In [1]:
import os
import json
from pathlib import Path

import pandas as pd

from neuralhydrology.nh_run import eval_run

In [2]:
# load allowed GPU ids
f = open("gpu.json")
gpus = json.load(f)
f.close()

In [3]:
epoch = 20
epoch_str = str(epoch) if epoch / 100 > 1 else f"0{epoch}"

In [4]:
run_dir = Path(f"./runs/{sorted(os.listdir('./runs'))[-1]}")

In [5]:
def eval_results(period: str, epoch: str = "010") -> pd.DataFrame:
    df = pd.read_csv(run_dir / f"{period}" / f"model_epoch{epoch}" / f"{period}_metrics.csv", dtype={'basin': str})
    df = df.set_index('basin')

    # Compute the median NSE from all basins, where discharge observations are available for that period
    print(f"Median NSE of the {period} period {df['NSE'].median():.3f}")
    print(f"Mean NSE of the {period} period {df['NSE'].mean():.3f}")
    
    return df

In [6]:
eval_run(run_dir, period="train", epoch=epoch, gpu=gpus[0])
df_train = eval_results("train", epoch=epoch_str)

# Evaluation:  40%|████      | 17/42 [00:12<00:17,  1.42it/s]

Basin 13002 All simulated values are NaN, thus metrics will be NaN, too.


# Evaluation:  43%|████▎     | 18/42 [00:13<00:18,  1.30it/s]

Basin 13005 All simulated values are NaN, thus metrics will be NaN, too.


# Evaluation:  45%|████▌     | 19/42 [00:15<00:24,  1.07s/it]

The following basins had not enough valid target values to calculate a standard deviation: 13035. NSE loss values for this basin will be NaN.


# Evaluation:  67%|██████▋   | 28/42 [00:24<00:15,  1.14s/it]

The following basins had not enough valid target values to calculate a standard deviation: 19013. NSE loss values for this basin will be NaN.


# Evaluation:  83%|████████▎ | 35/42 [00:27<00:03,  2.07it/s]

Basin 19243 All simulated values are NaN, thus metrics will be NaN, too.


# Evaluation: 100%|██████████| 42/42 [00:33<00:00,  1.27it/s]
Median NSE of the train period 0.935
Mean NSE of the train period 0.871


In [7]:
eval_run(run_dir, period="validation", epoch=epoch, gpu=gpus[0])
df_val = eval_results("validation", epoch=epoch_str)

# Validation:   7%|▋         | 3/42 [00:00<00:03, 10.30it/s]

The following basins had not enough valid target values to calculate a standard deviation: 11163. NSE loss values for this basin will be NaN.


# Validation:  45%|████▌     | 19/42 [00:02<00:02,  9.82it/s]

The following basins had not enough valid target values to calculate a standard deviation: 13035. NSE loss values for this basin will be NaN.


# Validation:  67%|██████▋   | 28/42 [00:03<00:01,  9.53it/s]

The following basins had not enough valid target values to calculate a standard deviation: 19013. NSE loss values for this basin will be NaN.


# Validation: 100%|██████████| 42/42 [00:04<00:00,  8.69it/s]
Median NSE of the validation period 0.904
Mean NSE of the validation period 0.821


In [8]:
eval_run(run_dir, period="test", epoch=epoch, gpu=gpus[0])
df_test = eval_results("test", epoch=epoch_str)

# Evaluation:  10%|▉         | 4/42 [00:00<00:04,  9.13it/s]

The following basins had not enough valid target values to calculate a standard deviation: 11163. NSE loss values for this basin will be NaN.


# Evaluation:  45%|████▌     | 19/42 [00:02<00:02,  8.34it/s]

The following basins had not enough valid target values to calculate a standard deviation: 13035. NSE loss values for this basin will be NaN.


# Evaluation:  67%|██████▋   | 28/42 [00:03<00:01,  9.42it/s]

The following basins had not enough valid target values to calculate a standard deviation: 19013. NSE loss values for this basin will be NaN.


# Evaluation: 100%|██████████| 42/42 [00:04<00:00,  8.64it/s]
Median NSE of the test period 0.808
Mean NSE of the test period -1.880


In [9]:
df_train = df_train.rename(columns={"NSE": "NSE_train", "KGE": "KGE_train"})
df_val = df_val.rename(columns={"NSE": "NSE_val", "KGE": "KGE_val"})
df_test = df_test.rename(columns={"NSE": "NSE_test", "KGE": "KGE_test"})

In [10]:
df_tmp = pd.merge(df_train, df_val, left_index=True, right_index=True)
df = pd.merge(df_tmp, df_test, left_index=True, right_index=True)
df.head()

Unnamed: 0_level_0,NSE_train,KGE_train,NSE_val,KGE_val,NSE_test,KGE_test
basin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
11001,0.986364,0.989054,0.980813,0.969651,0.945142,0.952948
11068,0.980227,0.988455,0.954066,0.96872,0.938136,0.968232
11126,0.977777,0.975617,0.980214,0.980604,0.970454,0.984694
11129,0.978555,0.974849,0.967298,0.981177,0.95802,0.968957
11163,0.95677,0.962357,,,,


File will be saved in the current runs directory

In [11]:
df.to_csv(f"{str(run_dir)}/eval.csv")