This notebook provides a guide for extracting various calibration results from the model's HDF5 file, which is generally large, for analysis.

In [None]:
import smash
import pandas as pd
import multiprocessing as mp
from preprocessing import load_data
import os
import pickle

In [None]:
smash.__version__

## 1. For local calibration

Create score dataframe:

In [None]:
dir = "models/local/p1/Uniform"
files = [os.path.join(dir, f) for f in os.listdir(dir)]

ncpu = 10

In [None]:
def scores_local(filepath, end_warmup):

    model = smash.io.read_model(filepath)
    nse = smash.metrics(model, "nse", end_warmup)[0]
    kge = smash.metrics(model, "kge", end_warmup)[0]

    return model.mesh.code[0], nse, kge

In [None]:
END_WARMUP = "2017-07-31"

pool = mp.Pool(ncpu)

res = pool.starmap(
    scores_local,
    [
        (f, END_WARMUP)
        for f in files
    ],
)

pool.close()

In [None]:
score = pd.DataFrame(data=res, columns=["code", "NSE_Uniform", "KGE_Uniform"])
score.to_csv("scores-local-u.csv", index=False)

Do the same thing for local-distributed to create ``scores-local-d.csv`` then merge these two files into ``scores-local.csv`` (you can do it simply by using Python with pandas or Excel).

Next, we merge this new file with catchment information dataframe into ``scores.csv``.

In [None]:
df1 = pd.read_csv("scores-local.csv")
df2 = pd.read_csv("catchment_info.csv")

merged_df = pd.merge(df1, df2[['code', 'nature']], on='code', how='left')

# Reorder the columns
columns_order = ['code', 'nature'] + [col for col in merged_df.columns if col != 'code' and col != 'nature']
merged_df = merged_df[columns_order]

In [None]:
merged_df.to_csv("scores.csv", index=False)

## 2. For regionalization

### 2.1. Extracting results from hdf5 to pickle file

In [None]:
csetup = "upstream"
model_path = f"models/p1/reg-{csetup}"
methods = ["Uniform", "ANN", "Multi-linear"]

In [None]:
models = {}
for m in methods:
    models[m] = smash.io.read_model(os.path.join(model_path, m+".hdf5"))

In [None]:
s_obs = {}
s_sim = {}
for method, model in models.items():
    s_obs[method] = smash.signatures(model, domain="obs")
    s_sim[method] = smash.signatures(model, domain="sim")

In [None]:
for method, model in models.items():
    with open(os.path.join(f"models/reg-{csetup}", f"{method}_parameters.pickle"), "wb") as f:
        pickle.dump({k: model.rr_parameters.values[..., i] for i, k in enumerate(model.rr_parameters.keys)}, f)
    with open(os.path.join(f"models/reg-{csetup}", f"{method}_signatures.pickle"), "wb") as f:
        pickle.dump({"obs": s_obs[method], "sim": s_sim[method]}, f)
    with open(os.path.join(f"models/reg-{csetup}", f"{method}_discharges.pickle"), "wb") as f:
        pickle.dump({"obs": model.response_data.q, "sim": model.response.q}, f)

### 2.2. Create score dataframe

In [None]:
END_WARMUP = "2017-07-31"
df_info = pd.read_csv("catchment_info.csv")

In [None]:
def scores_reg(model, end_warmup):

    nse = smash.metrics(model, "nse", end_warmup)
    kge = smash.metrics(model, "kge", end_warmup)

    return nse, kge

In [None]:
score = {"code": list(models.values())[0].mesh.code}

for m in methods:
    nse, kge = scores_reg(models[m], END_WARMUP)
    score["NSE_" + m] = nse.copy()
    score["KGE_" + m] = kge.copy()

score = pd.DataFrame(score)

In [None]:
df_info["domain"] = "val"
df_info.loc[df_info["nature"]==csetup, "domain"] = "cal"

In [None]:
merged_df = pd.merge(score, df_info[['code', 'nature', 'domain']], on='code', how='left')

# Reorder the columns
columns_order = ['code', 'nature', 'domain'] 
columns_order += [col for col in merged_df.columns if not col in columns_order]

merged_df = merged_df[columns_order]

In [None]:
merged_df.to_csv(f"scores.csv", index=False)

## 3. For validation

In [None]:
path_p1 = "models/reg-upstream/p1"  # path to calibrated model on P1
path_p2 = "models/reg-upstream/p2"  # expected path to create model on P2

In [None]:
setup, mesh = load_data(
                        "catchment_info.csv", 
                        start_time="2020-08-01",
                        end_time="2022-07-31",
                        )

In [None]:
def create_model_p2(setup, mesh, path_p1, path_p2):

    with open(path_p1 + "_parameters.pickle", "rb") as f:
        rr_parameters = pickle.load(f)

    model_p2 = smash.Model(setup, mesh)

    for k in model_p2.rr_parameters.keys:
        model_p2.set_rr_parameters(k, rr_parameters[k])

    model_p2.forward_run()

    smash.io.save_model(model_p2, path_p2 + ".hdf5")

In [None]:
create_model_p2(setup, mesh, path_p1, path_p2)