In [259]:
import os
import sys
PROJECT_PATH = os.path.dirname(os.path.dirname(os.path.realpath("__file__")))
sys.path.append(PROJECT_PATH)

import json
import pickle

import numpy as np
import pandas as pd
import torch
from torchsummary import summary

from src.data.downloader import Downloader
from src.model.baseline_model import BaselineModel
from src.model.linear_model import LinearModel
from src.model.mlp_model import MLPModel
from src.model.lstm_model import HydroForecast
from src.evaluation.evaluator import Evaluator

In [11]:
%%capture
gdrive_link = "https://drive.google.com/uc?export=download&id="
Downloader(gdrive_id="1XMqFFSc65UVE3EYh_tgPHtVRnonshjEO", file_name="all_analysis_for_paper.json") 
Downloader(files=json.load(open(os.path.join(PROJECT_PATH, "data", "all_analysis_for_paper.json"))))

# Data loading, preprocessing

In [206]:
data_training = pd.read_csv("../data/data_training.csv")
data_validation = pd.read_csv('../data/data_validation.csv')

data = pd.concat([data_training, data_validation])
data["Date"] = pd.to_datetime(data["Date"])
data = data.set_index('Date')
meta = pd.read_csv("../data/meta.csv")
data =  data.loc[data.index >= '2005']

data


Unnamed: 0_level_0,1515,1516,1518,1521,1719,1720,1722,1723,2040,2046,...,1732,1734,2049,2741,2742,2751,2545,744624,210888,210900
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-01,-80.0,45.0,9.0,387.0,513.0,460.0,274.0,322.0,489.0,320,...,-47.0,35.0,142.0,86.0,229.0,244.0,222.0,82.696130,307.000000,288.000000
2005-01-02,-111.0,5.0,-36.0,362.0,505.0,462.0,243.0,310.0,482.0,299,...,-46.0,30.0,139.0,85.0,227.0,241.0,198.0,69.923412,309.000000,306.000000
2005-01-03,-123.0,-9.0,-67.0,341.0,499.0,465.0,214.0,270.0,466.0,274,...,-47.0,27.0,137.0,75.0,225.0,242.0,198.0,72.505006,304.000000,306.000000
2005-01-04,-132.0,-11.0,-72.0,336.0,498.0,464.0,203.0,260.0,464.0,226,...,-50.0,27.0,134.0,109.0,246.0,241.0,198.0,63.899692,293.000000,293.000000
2005-01-05,-127.0,-6.0,-76.0,331.0,496.0,466.0,193.0,258.0,458.0,210,...,-38.0,27.0,133.0,149.0,246.0,303.0,202.0,63.039160,287.000000,282.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-26,-27.0,75.0,-69.0,309.0,478.0,460.0,138.0,204.0,451.0,-78,...,-70.0,52.0,136.0,74.0,194.0,198.0,-18.0,31.000000,227.059349,212.846617
2020-12-27,-94.0,21.0,-50.0,333.0,488.0,460.0,206.0,252.0,464.0,20,...,-88.0,42.0,133.0,82.0,194.0,216.0,-17.0,19.000000,235.703288,221.319022
2020-12-28,-137.0,-20.0,-92.0,311.0,481.0,459.0,201.0,250.0,461.0,120,...,-86.0,40.0,133.0,83.0,197.0,215.0,-22.0,24.000000,256.980675,242.174172
2020-12-29,-140.0,-31.0,-118.0,303.0,481.0,462.0,182.0,239.0,457.0,132,...,0.0,53.0,133.0,85.0,197.0,203.0,-24.0,32.000000,273.603634,258.467258


In [208]:
s_data = data.copy()

In [209]:
norm_std = pd.read_csv('../data/params/paramsSTDS.csv',header=None)
norm_mean = pd.read_csv('../data/params/paramsMean.csv',header=None)

mean_szeged = pd.read_csv('../data/params/paramsYMean.csv')
std_szeged = pd.read_csv('../data/params/paramsYSTDS.csv')



s_data = (s_data - norm_mean.values.T).div(norm_std.values.T)

In [210]:
s_data = s_data.iloc[0: -1 -6]
windowed_data = np.lib.stride_tricks.sliding_window_view(
    s_data, (15, 48)
).reshape((-1, 15, 48))
win_data_torch = torch.tensor(windowed_data, dtype=torch.float32)
win_data_torch.shape

torch.Size([5822, 15, 48])

In [211]:
y_true = pd.read_csv(os.path.join(PROJECT_PATH, "data", "y_true_2005-01-01_2020-12-24.csv"),
                     header=0, index_col=0)
y_true.index = pd.to_datetime(y_true.index)
y_true.drop(y_true.head(15).index,inplace=True)

## Loading our lstm

In [11]:
lstm = torch.load('../data/lstm_model.pth')
lstm

HydroForecast(
  (lstm): LSTM(48, 12, num_layers=5, batch_first=True)
  (linear): Linear(in_features=12, out_features=7, bias=True)
)

## Load CNN

In [None]:
cnn = torch.load('..')

## Get predictions

In [213]:
def get_predictions(model, st_id = "2275"):
    pred = pd.DataFrame(
        data=model(win_data_torch).detach().numpy(),
        index=y_true.index,
        columns=y_true.columns)

    return pred
    #return (pred + mean_szeged.iloc[0,0]) * std_szeged.iloc[0,0]

In [243]:
pred = get_predictions(model=lstm) * std_szeged.iloc[0,0] + mean_szeged.iloc[0,0]
pred.drop(pred.head(1).index,inplace=True)
pred.index = y_true.index.tolist()[0:-1]

In [255]:
(pred - y_true).abs().mean()

1day    10.424544
2day    10.778769
3day    12.252495
4day    14.028819
5day    16.752850
6day    21.535234
7day    27.721312
dtype: float64

In [256]:
pred

Unnamed: 0,1day,2day,3day,4day,5day,6day,7day
2005-01-16,237.218475,230.892242,222.210815,214.501266,211.315491,212.531555,218.088974
2005-01-17,214.809296,199.537888,186.197571,179.280502,180.448929,187.150635,197.257462
2005-01-18,208.240646,198.924164,188.705902,185.018951,188.999603,195.589691,206.675034
2005-01-19,192.461761,180.218506,168.993622,164.619553,168.151733,174.846207,186.167282
2005-01-20,180.693802,172.037964,163.428360,161.092438,165.828934,172.355362,183.546616
...,...,...,...,...,...,...,...
2020-12-19,117.023216,113.965782,110.781898,108.499237,107.526619,108.835838,110.496605
2020-12-20,114.820755,111.131889,109.138031,109.425301,111.166214,114.387962,117.375298
2020-12-21,113.346275,110.325478,109.524384,110.962044,113.535583,117.216187,120.581268
2020-12-22,109.478752,107.339523,108.768921,112.784172,117.553490,122.831802,127.140511


In [264]:
evaluator = Evaluator(y_true.drop(y_true.tail(1).index),pred)

In [266]:
evaluator.calculate_all_stats().to_csv('../data/metrics/lstm_metrics.csv')

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=

In [269]:
pred.to_csv('../data/metrics/lstm_model_raw_prediction_mod.csv')