In [None]:
import smash
import numpy as np
import pandas as pd
from datetime import timedelta

In [1]:
pathmodel = "models/model-train-P1.hdf5"
outfile = "data-P1.csv"

In [None]:
def timestep_1year(st, dt, n_ts):

    timestep = np.arange(1, int(365*24*60*60/dt) + 1)

    defst = f"{pd.to_datetime(st).year}-08-01 00:00:00"

    if pd.Timestamp(st) < pd.Timestamp(defst):
        timestamps = pd.date_range(start=st, end=defst, freq=timedelta(seconds=dt))
        s_ind = timestep.size - (len(timestamps) - 1)
    else:
        timestamps = pd.date_range(start=defst, end=st, freq=timedelta(seconds=dt))
        s_ind = len(timestamps) - 1

    return np.array([timestep[(s_ind+i)%len(timestep)] for i in range(n_ts)])

In [None]:
try:
    model = smash.read_model(pathmodel)
except:
    try:
        model = smash.read_model_ddt(pathmodel)

        dt = model["dt"]
        st = model["start_time"]
        
        code = model["code"]

        qs = model["qsim"]

        qo = model["qobs"]
        qo[qo<0] = np.nan

        prcp = model["mean_prcp"]
        prcp[prcp<0] = np.nan

        pet = model["mean_pet"]
        pet[pet<0] = np.nan
        
        surf = model["area"]

        bias = qo - qs
    except IOError:
        raise IOError("Failed to read .hdf5 file")
    
tstep = np.repeat(np.arange(qs.shape[-1]), qs.shape[0])

tsy = timestep_1year(st, dt, qs.shape[-1])
tsy = np.repeat(tsy, qs.shape[0])

code = np.tile(code, qs.shape[-1])
surf = np.tile(surf, qs.shape[-1])
qs = qs.flatten(order="F")
prcp = prcp.flatten(order="F")
pet = pet.flatten(order="F")

bias = bias.flatten(order="F")

df = pd.DataFrame({"code": code, "timestep": tstep, "timestep_in_year": tsy, "surface": surf, "precipitation": prcp, "pet": pet, "discharge_sim": qs, "bias": bias})
        

In [None]:
list_code= [
    "Y4624010",
    "Y6434005",
    "Y5615030",
    "Y5325010",
    "Y5032010",
    "Y5202010",
    "Y4615020",
    "Y5424010",
    "Y5615010",
]

In [None]:
df = df[df.code.isin(list_code)]
df

In [None]:
pd.plotting.scatter_matrix(df[df.code=="Y5202010"], alpha=0.2, diagonal='kde')

In [None]:
df.to_csv(outfile, index=False)