In [1]:
import streamlit as st

import torch
import torch.nn as nn
import torch.optim as optim

import pandas as pd

from torch.utils.data import Dataset

import numpy as np

import HydroErr

import pygad

In [3]:
# load model
decoder = torch.load(
    "data/final_lstm_decoder_test.pt", map_location=torch.device("cpu")
)

decoder.eval()

LSTM_decoder(
  (lstm): LSTM(11, 177, batch_first=True)
  (fc_layers): TimeDistributed(
    (module): Sequential(
      (0): Linear(in_features=177, out_features=3, bias=True)
      (1): ReLU()
      (2): Dropout(p=0, inplace=False)
      (3): Linear(in_features=3, out_features=4, bias=True)
      (4): ReLU()
      (5): Dropout(p=0, inplace=False)
      (6): Linear(in_features=4, out_features=13, bias=True)
      (7): ReLU()
      (8): Dropout(p=0, inplace=False)
      (9): Linear(in_features=13, out_features=1, bias=True)
    )
  )
)

In [4]:
catchments = pd.read_csv("./data/Caravan-CAMELS/catchments.csv", dtype=str)
catchments

Unnamed: 0,gauge_id,gauge_name,data_all,data_train,data_test
0,01022500,"Narraguagus River at Cherryfield, Maine",./data/Caravan-CAMELS/01022500.csv,./data/Caravan-CAMELS/01022500_train.csv,./data/Caravan-CAMELS/01022500_test.csv
1,01031500,"Piscataquis River near Dover-Foxcroft, Maine",./data/Caravan-CAMELS/01031500.csv,./data/Caravan-CAMELS/01031500_train.csv,./data/Caravan-CAMELS/01031500_test.csv
2,01047000,"Carrabassett River near North Anson, Maine",./data/Caravan-CAMELS/01047000.csv,./data/Caravan-CAMELS/01047000_train.csv,./data/Caravan-CAMELS/01047000_test.csv
3,01052500,"Diamond River near Wentworth Location, NH",./data/Caravan-CAMELS/01052500.csv,./data/Caravan-CAMELS/01052500_train.csv,./data/Caravan-CAMELS/01052500_test.csv
4,01054200,"Wild River at Gilead, Maine",./data/Caravan-CAMELS/01054200.csv,./data/Caravan-CAMELS/01054200_train.csv,./data/Caravan-CAMELS/01054200_test.csv
...,...,...,...,...,...
477,14308990,"COW CREEK ABV GALESVILLE RES, NR AZALEA, OR.",./data/Caravan-CAMELS/14308990.csv,./data/Caravan-CAMELS/14308990_train.csv,./data/Caravan-CAMELS/14308990_test.csv
478,14309500,"WEST FORK COW CREEK NEAR GLENDALE, OR",./data/Caravan-CAMELS/14309500.csv,./data/Caravan-CAMELS/14309500_train.csv,./data/Caravan-CAMELS/14309500_test.csv
479,14316700,"STEAMBOAT CREEK NEAR GLIDE, OR",./data/Caravan-CAMELS/14316700.csv,./data/Caravan-CAMELS/14316700_train.csv,./data/Caravan-CAMELS/14316700_test.csv
480,14325000,"SOUTH FORK COQUILLE RIVER AT POWERS, OR",./data/Caravan-CAMELS/14325000.csv,./data/Caravan-CAMELS/14325000_train.csv,./data/Caravan-CAMELS/14325000_test.csv


In [12]:
warm_up = 365

class Objective_builder:
    def __init__(self, x, y):
        self.x = x.contiguous()
        self.y = y.contiguous()

    def eval(self, ga_instance, solution, solution_idx):
        # numpy to torch tensor
        solution = torch.from_numpy(solution).unsqueeze(0).to(dtype=torch.float32)
        solution = solution.expand(self.x.shape[0], -1)

        # BASE_LENGTH is from global
        pred = (
            decoder.decode(solution, self.x, base_length=warm_up)
            .view(-1)
            .detach()
            .cpu()
            .numpy()
        )

        ob = self.y.view(-1).detach().cpu().numpy()[warm_up:]

        return HydroErr.kge_2009(simulated_array=pred, observed_array=ob)

    def pred(self, solution):
        # numpy to torch tensor
        solution = torch.from_numpy(solution).unsqueeze(0).to(dtype=torch.float32)
        solution = solution.expand(self.x.shape[0], -1)

        # BASE_LENGTH is from global
        pred = (
            decoder.decode(solution, self.x, base_length=warm_up)
            .view(-1)
            .detach()
            .cpu()
            .numpy()
        )

        ob = self.y.view(-1).detach().cpu().numpy()[warm_up:]

        d = {
            "Simulated [mm/day]": pred.tolist(),
            "Observation [mm/day]": ob.tolist(),
        }

        chart_data = pd.DataFrame(data=d)

        return chart_data


In [18]:
num_generations = 500
num_parents_mating = 10
sol_per_pop=100

calibrate_KGES = np.ones(catchments.__len__())
calibrate_NSES = np.ones(catchments.__len__())

test_KGES = np.ones(catchments.__len__())
test_NSES = np.ones(catchments.__len__())

camels_embeddings = np.ones([catchments.__len__(), 8])


for i in range(1): #catchments.__len__()
    print(f'i={i} starts')

    data_train = np.genfromtxt(catchments["data_train"][i], delimiter=",")
    data_test = np.genfromtxt(catchments["data_test"][i], delimiter=",")
    
    x_cal = torch.from_numpy(data_train[:, 0:3]).unsqueeze(0).to(dtype=torch.float32)
    y_cal = torch.from_numpy(data_train[:, 3]).unsqueeze(0).to(dtype=torch.float32)
    
    x_test = torch.from_numpy(data_test[:, 0:3]).unsqueeze(0).to(dtype=torch.float32)
    y_test = torch.from_numpy(data_test[:, 3]).unsqueeze(0).to(dtype=torch.float32)
    
    num_genes = 8

    init_range_low = -11
    init_range_high = 11

    parent_selection_type = "sss"

    crossover_type = "single_point"

    mutation_type = "random"
    mutation_probability = 0.25

    fn_cal = Objective_builder(x_cal, y_cal)
    fn_test = Objective_builder(x_test, y_test)
    
    ga_instance = pygad.GA(
        num_generations=num_generations,
        num_parents_mating=num_parents_mating,
        fitness_func=fn_cal.eval,
        sol_per_pop=sol_per_pop,
        num_genes=num_genes,
        init_range_low=init_range_low,
        init_range_high=init_range_high,
        parent_selection_type=parent_selection_type,
        crossover_type=crossover_type,
        mutation_type=mutation_type,
        mutation_probability=mutation_probability,
        stop_criteria="saturate_10"
    )
    
    # run simulation
    ga_instance.run()
    
    chart_cal = fn_cal.pred(ga_instance.best_solution()[0])    
    chart_test = fn_test.pred(ga_instance.best_solution()[0])
    
    # gof
    kge_cal = round(HydroErr.kge_2009(simulated_array=chart_cal[ "Simulated [mm/day]"], observed_array=chart_cal[ "Observation [mm/day]"]),3)# round(ga_instance.best_solution()[1], 3)
    kge_test = round(HydroErr.kge_2009(simulated_array=chart_test[ "Simulated [mm/day]"], observed_array=chart_test[ "Observation [mm/day]"]),3)# round(fn_test.eval(0, ga_instance.best_solution()[0], 0), 3)
    
    nse_cal = round(HydroErr.nse(simulated_array=chart_cal[ "Simulated [mm/day]"], observed_array=chart_cal[ "Observation [mm/day]"]),3)# round(ga_instance.best_solution()[1], 3)
    nse_test = round(HydroErr.nse(simulated_array=chart_test[ "Simulated [mm/day]"], observed_array=chart_test[ "Observation [mm/day]"]),3)# round(fn_test.eval(0, ga_instance.best_solution()[0], 0), 3)

    camels_embeddings[i,:] = ga_instance.best_solution()[0]
    
    calibrate_KGES[i]=kge_cal
    test_KGES[i]=kge_test
    calibrate_NSES[i]=nse_cal
    test_NSES[i]=nse_test
    
    print(f'fit={test_KGES[i]}')

i=0 starts


 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510
 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524
 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538
 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552
 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566
 6567 6568 6569 6570 6571 6572 6573 6574] contained NaN values and the row(s) have been removed (Rows are zero indexed).


ValueError: setting an array element with a sequence. The requested array would exceed the maximum number of dimension of 1.