In [1]:
from pathlib import Path

import torch
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from neuralhydrology.modelzoo.cudalstm import CudaLSTM
from neuralhydrology.utils.config import Config
from neuralhydrology.datasetzoo import get_dataset
from neuralhydrology.datautils.utils import load_scaler

In [2]:
BASIN_ID = 11129
EPOCH = 20

In [3]:
run_dir = Path("./runs/lstm_1607_123349")
config = Config(run_dir / "config.yml")
config.as_dict()

{'batch_size': 512,
 'clip_gradient_norm': 1,
 'commit_hash': 'b2e3136',
 'data_dir': PosixPath('../data/CAMELS_KZ'),
 'dataset': 'generic',
 'device': 'cuda:0',
 'dynamic_inputs': ['prcp',
  'srad',
  't_max',
  't_min',
  'pp_mean',
  'sat_max',
  'hum_mean',
  'dew_min',
  'discharge_prev'],
 'epochs': 20,
 'experiment_name': 'lstm',
 'head': 'regression',
 'hidden_size': 128,
 'img_log_dir': PosixPath('/home/spectre/Projects/ISSAI/Internship/aqua_rate/ML/runs/lstm_1607_123349/img_log'),
 'initial_forget_bias': 3,
 'learning_rate': {0: 0.01, 10: 0.001, 30: 0.0001, 40: 1e-05},
 'log_interval': 5,
 'log_tensorboard': True,
 'loss': 'NSE',
 'metrics': ['NSE', 'KGE'],
 'model': 'cudalstm',
 'num_workers': 8,
 'number_of_basins': 42,
 'optimizer': 'Adam',
 'output_activation': 'linear',
 'output_dropout': 0.4,
 'package_version': '1.10.0',
 'predict_last_n': 1,
 'run_dir': PosixPath('/home/spectre/Projects/ISSAI/Internship/aqua_rate/ML/runs/lstm_1607_123349'),
 'save_validation_results':

In [4]:
model = CudaLSTM(cfg=config)
model = model.eval()
model

CudaLSTM(
  (embedding_net): InputLayer(
    (statics_embedding): Identity()
    (dynamics_embedding): Identity()
  )
  (lstm): LSTM(31, 128)
  (dropout): Dropout(p=0.4, inplace=False)
  (head): Regression(
    (net): Sequential(
      (0): Linear(in_features=128, out_features=1, bias=True)
    )
  )
)

In [5]:
# model.load_state_dict(torch.load(run_dir / f"model_epoch0{EPOCH}.pt"))
# model

In [6]:
# ds = get_dataset(cfg=config, is_train=False, basin=str(BASIN_ID), period="test", scaler=load_scaler(run_dir))
ds = get_dataset(cfg=config, is_train=False, period="test", scaler=load_scaler(run_dir))
loader = DataLoader(ds, batch_size=1, num_workers=0, collate_fn=ds.collate_fn)

The following basins had not enough valid target values to calculate a standard deviation: 11163. NSE loss values for this basin will be NaN.


In [7]:
for key, value in next(iter(loader)).items():
    print(f"{key}: {value.shape}")

x_d: torch.Size([1, 365, 9])
y: torch.Size([1, 365, 1])
date: (1, 365)
x_s: torch.Size([1, 22])
per_basin_target_stds: torch.Size([1, 1, 1])


In [8]:
next(iter(loader))

{'x_d': tensor([[[-0.3361, -0.4118, -1.6943,  ...,  0.8827, -1.9821, -0.0423],
          [-0.3361, -0.4448, -1.5761,  ...,  0.5486, -2.1448, -0.0417],
          [-0.3361, -0.3563, -1.6812,  ...,  0.7308, -2.0634, -0.0417],
          ...,
          [-0.3361, -0.4839, -1.2312,  ..., -1.0004, -1.4211, -0.0623],
          [-0.3361, -0.8706, -1.1984,  ..., -0.2715, -1.0914, -0.0617],
          [ 0.1848, -0.7841, -1.2542,  ...,  0.9434, -1.0572, -0.0611]]]),
 'y': tensor([[[    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [  

In [9]:
input_data = next(iter(loader))
# data preprocess
input_data = model.pre_model_hook(input_data, is_train=False)
# forward pass
pred = model(input_data)
pred

{'lstm_output': tensor([[[-0.4573, -0.0445, -0.0706,  ...,  0.1189,  0.1826,  0.3705],
          [-0.5648, -0.0598, -0.1302,  ...,  0.2131,  0.3271,  0.6374],
          [-0.5881, -0.0659, -0.1597,  ...,  0.2657,  0.4186,  0.7772],
          ...,
          [-0.6079, -0.0618, -0.1478,  ...,  0.2765,  0.5261,  0.9039],
          [-0.6078, -0.0669, -0.1553,  ...,  0.2858,  0.5448,  0.9064],
          [-0.5989, -0.0729, -0.1611,  ...,  0.2996,  0.5458,  0.9039]]],
        grad_fn=<TransposeBackward0>),
 'h_n': tensor([[[-0.5989, -0.0729, -0.1611, -0.6813, -0.0113,  0.9232, -0.8162,
            0.9621, -0.3944, -0.7837, -0.9732, -0.1138, -0.9553,  0.7547,
           -0.7745, -0.9249,  0.9028,  0.1640,  0.5071,  0.1963, -0.4468,
           -0.1228,  0.7752,  0.6702, -0.0994,  0.0407, -0.0784, -0.1311,
            0.3071, -0.9302,  0.7704, -0.0553,  0.9188, -0.9762,  0.6367,
           -0.7427, -0.7229, -0.0341,  0.5914,  0.5758,  0.6849, -0.9363,
            0.0365, -0.2984, -0.0937, -0.2349,

In [10]:
torch.cat([input_data["x_d"], input_data["x_s"].unsqueeze(0).expand(1, 365, 22)], dim=2)

tensor([[[-0.3361, -0.4118, -1.6943,  ...,  0.5518, 44.1814,  1.4554],
         [-0.3361, -0.4448, -1.5761,  ...,  0.5518, 44.1814,  1.4554],
         [-0.3361, -0.3563, -1.6812,  ...,  0.5518, 44.1814,  1.4554],
         ...,
         [-0.3361, -0.4839, -1.2312,  ...,  0.5518, 44.1814,  1.4554],
         [-0.3361, -0.8706, -1.1984,  ...,  0.5518, 44.1814,  1.4554],
         [ 0.1848, -0.7841, -1.2542,  ...,  0.5518, 44.1814,  1.4554]]])

In [11]:
torch.cat([input_data["x_d"], input_data["x_s"].unsqueeze(0).expand(1, 365, 22)], dim=2).shape

torch.Size([1, 365, 31])

In [12]:
for key, value in pred.items():
    print(f"{key}: {value.shape}")

lstm_output: torch.Size([1, 365, 128])
h_n: torch.Size([1, 1, 128])
c_n: torch.Size([1, 1, 128])
y_hat: torch.Size([1, 365, 1])


In [13]:
pred["lstm_output"][0][-1]

tensor([-0.5989, -0.0729, -0.1611, -0.6813, -0.0113,  0.9232, -0.8162,  0.9621,
        -0.3944, -0.7837, -0.9732, -0.1138, -0.9553,  0.7547, -0.7745, -0.9249,
         0.9028,  0.1640,  0.5071,  0.1963, -0.4468, -0.1228,  0.7752,  0.6702,
        -0.0994,  0.0407, -0.0784, -0.1311,  0.3071, -0.9302,  0.7704, -0.0553,
         0.9188, -0.9762,  0.6367, -0.7427, -0.7229, -0.0341,  0.5914,  0.5758,
         0.6849, -0.9363,  0.0365, -0.2984, -0.0937, -0.2349, -0.6957,  0.7718,
         0.1811,  0.9528, -0.9725,  0.4017, -0.8589, -0.9497,  0.2833, -0.6334,
         0.0281,  0.0351, -0.0918, -0.8935, -0.7875, -0.9622, -0.6862,  0.1580,
         0.9516, -0.1166,  0.0113, -0.0541,  0.0458, -0.0186,  0.2381,  0.9241,
         0.0183, -0.8126, -0.0095, -0.9430, -0.0345,  0.4028,  0.0221, -0.0459,
        -0.2387,  0.1017,  0.0329, -0.1244, -0.0598, -0.1087, -0.0517, -0.2915,
        -0.2966,  0.0821,  0.4491, -0.6525, -0.0279, -0.0904,  0.9211, -0.5593,
        -0.4162, -0.0089, -0.0234, -0.53

In [14]:
pred["h_n"][0][0]

tensor([-0.5989, -0.0729, -0.1611, -0.6813, -0.0113,  0.9232, -0.8162,  0.9621,
        -0.3944, -0.7837, -0.9732, -0.1138, -0.9553,  0.7547, -0.7745, -0.9249,
         0.9028,  0.1640,  0.5071,  0.1963, -0.4468, -0.1228,  0.7752,  0.6702,
        -0.0994,  0.0407, -0.0784, -0.1311,  0.3071, -0.9302,  0.7704, -0.0553,
         0.9188, -0.9762,  0.6367, -0.7427, -0.7229, -0.0341,  0.5914,  0.5758,
         0.6849, -0.9363,  0.0365, -0.2984, -0.0937, -0.2349, -0.6957,  0.7718,
         0.1811,  0.9528, -0.9725,  0.4017, -0.8589, -0.9497,  0.2833, -0.6334,
         0.0281,  0.0351, -0.0918, -0.8935, -0.7875, -0.9622, -0.6862,  0.1580,
         0.9516, -0.1166,  0.0113, -0.0541,  0.0458, -0.0186,  0.2381,  0.9241,
         0.0183, -0.8126, -0.0095, -0.9430, -0.0345,  0.4028,  0.0221, -0.0459,
        -0.2387,  0.1017,  0.0329, -0.1244, -0.0598, -0.1087, -0.0517, -0.2915,
        -0.2966,  0.0821,  0.4491, -0.6525, -0.0279, -0.0904,  0.9211, -0.5593,
        -0.4162, -0.0089, -0.0234, -0.53

In [15]:
ds._per_basin_target_stds

{'11001': tensor([[0.2768]]),
 '11068': tensor([[0.2847]]),
 '11126': tensor([[1.1639]]),
 '11129': tensor([[1.7891]]),
 '11163': tensor([[nan]]),
 '11164': tensor([[1.7698]]),
 '11275': tensor([[0.2989]]),
 '11293': tensor([[0.1148]]),
 '11395': tensor([[0.7345]]),
 '11397': tensor([[0.0529]]),
 '11421': tensor([[31.4752]]),
 '11433': tensor([[0.9503]]),
 '11469': tensor([[0.5810]]),
 '12002': tensor([[0.0804]]),
 '12008': tensor([[0.0070]]),
 '12032': tensor([[0.1136]]),
 '12072': tensor([[0.0450]]),
 '12075': tensor([[0.0667]]),
 '12564': tensor([[0.1507]]),
 '13002': tensor([[0.0118]]),
 '13005': tensor([[0.2477]]),
 '13038': tensor([[0.0009]]),
 '13048': tensor([[0.1841]]),
 '13064': tensor([[0.1512]]),
 '13090': tensor([[0.2235]]),
 '13091': tensor([[0.3406]]),
 '13115': tensor([[0.0473]]),
 '13128': tensor([[0.1171]]),
 '13221': tensor([[0.1861]]),
 '19022': tensor([[0.0144]]),
 '19195': tensor([[0.0735]]),
 '19196': tensor([[0.0385]]),
 '19205': tensor([[0.1231]]),
 '19208': te

In [16]:
stds = set()
for data in loader:
    stds.add(data["per_basin_target_stds"][0][0][0].item())

In [17]:
stds

{0.2767565846443176,
 0.28465375304222107,
 1.1639032363891602,
 1.7891430854797363,
 1.769752860069275,
 0.29886099696159363,
 0.11484294384717941,
 nan,
 0.7344523668289185,
 nan,
 0.05291290581226349,
 nan,
 0.9503335952758789,
 nan,
 0.580983579158783,
 nan,
 0.014383990317583084,
 nan,
 0.03850247338414192,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 31.475189208984375,
 nan,
 0.08041262626647949,
 nan,
 0.007003425620496273,
 nan,
 0.11358395218849182,
 nan,
 0.04498513415455818,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 0.024880042299628258,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 

In [18]:
len(stds)

1137

In [19]:
len(loader)

46032

In [20]:
ds_basin = get_dataset(cfg=config, is_train=False, basin=str(BASIN_ID), period="test", scaler=load_scaler(run_dir))
loader_basin = DataLoader(ds, batch_size=1, num_workers=0, collate_fn=ds.collate_fn)

In [21]:
next(iter(loader_basin))

{'x_d': tensor([[[-0.3361, -0.4118, -1.6943,  ...,  0.8827, -1.9821, -0.0423],
          [-0.3361, -0.4448, -1.5761,  ...,  0.5486, -2.1448, -0.0417],
          [-0.3361, -0.3563, -1.6812,  ...,  0.7308, -2.0634, -0.0417],
          ...,
          [-0.3361, -0.4839, -1.2312,  ..., -1.0004, -1.4211, -0.0623],
          [-0.3361, -0.8706, -1.1984,  ..., -0.2715, -1.0914, -0.0617],
          [ 0.1848, -0.7841, -1.2542,  ...,  0.9434, -1.0572, -0.0611]]]),
 'y': tensor([[[    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [    nan],
          [  