# v2.1 exploration trying to make it work better

In [None]:
# Environment
import os
import os.path as osp
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
import pandas as pd
from utils import print_dict_summary
from data_funcs import rmse
from moisture_rnn import RNNParams, RNNData, RNN, RNN_LSTM
from moisture_rnn_pkl import pkl2train
from tensorflow.keras.callbacks import Callback
from utils import hash2
import copy
import logging
import pickle
from utils import logging_setup, read_yml, read_pkl, hash_ndarray, hash_weights
import yaml
import copy

In [None]:
logging_setup()

## Test Learning Schedule

In [None]:
train = read_pkl('train.pkl')
train.keys()

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN, RNNData

In [None]:
params = read_yml("params.yaml", subkey="rnn")
params = RNNParams(params)

In [None]:
rnn_dat = RNNData(train['PLFI1_202401'], scaler=params['scaler'], features_list = params['features_list'])

In [None]:
rnn_dat.train_test_split(
    time_fracs = [.9, .05, .05]
)
rnn_dat.scale_data()
rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])

In [None]:
reproducibility.set_seed()
params.update({'batch_schedule_type': 'exp', 'bmin': 20, 'bmax': rnn_dat.hours})
params.update({'verbose_fit': True})
rnn = RNN(params)
m, errs, best = rnn.run_model(rnn_dat, plot_period="predict", return_epochs=True)

## Test Spatial Data

In [None]:
train = read_pkl('train.pkl')

In [None]:
params = read_yml("params.yaml", subkey="rnn")
params = RNNParams(params)

In [None]:
len(train.keys())

In [None]:
from itertools import islice
dat = {k: train[k] for k in islice(train, 100)}

In [None]:
dat.keys()

In [None]:
from data_funcs import combine_nested
dd = combine_nested(dat)

In [None]:
import importlib
import utils
importlib.reload(utils)
from utils import Dict

In [None]:
dd = Dict(dd)

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNNData

In [None]:
rnn_dat = RNNData(dd, scaler="standard", 
                  features_list = ['Ed', 'Ew', 'rain', 'elev', 'lon', 'lat'])

In [None]:
# rnn_dat.train_test_split(   
#     time_fracs = [.9, .05, .05],
#     space_fracs = [.9, .05, .05]
# )
rnn_dat.train_test_split(   
    time_fracs = [.9, .05, .05],
    space_fracs = [.8, .1, .1]
)

In [None]:
rnn_dat.scale_data()

In [None]:
print(params['batch_size'])

In [None]:
rnn_dat.batch_reshape(
    timesteps = params['timesteps'], 
    batch_size = params['batch_size'],
    verbose=True
)

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 25, 'learning_rate': 0.0001, 'verbose_fit': False, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['relu', 'relu'],
              'features_list': rnn_dat.features_list})
params.update({'batch_schedule_type': 'exp', 'bmin': 20, 'bmax': rnn_dat.hours})
reproducibility.set_seed(123)
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

In [None]:
errs.mean()

In [None]:
import importlib
import data_funcs
importlib.reload(data_funcs)
from data_funcs import process_train_dict

In [None]:
from data_funcs import process_train_dict
data_params = read_yml("params_data.yaml")
data_params.update({
    'hours': 3648
})
train2 = process_train_dict("data/fmda_nw_202401-05_f05.pkl", data_params=data_params, verbose=True)

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNNData

In [None]:
dat = {k: train2[k] for k in islice(train2, 100)}
dd = combine_nested(dat)
dd = Dict(dd)
rnn_dat = RNNData(dd, scaler="standard", 
                  features_list = ['Ed', 'Ew', 'rain', 'elev', 'lon', 'lat'])
rnn_dat.train_test_split(   
    time_fracs = [.7, .15, .15],
    space_fracs = [.8, .1, .1]
)
params.update({'batch_size': 32})
rnn_dat.batch_reshape(
    timesteps = params['timesteps'], 
    batch_size = params['batch_size'],
    start_times = np.zeros(len(rnn_dat.case)).astype(int),
    verbose=False
)

In [None]:
rnn_dat.X_train.shape

In [None]:
rnn_dat.X_val.shape

In [None]:
rnn_dat.X_test[0].shape

In [None]:
params.update({'epochs': 25, 'learning_rate': 0.0001, 'verbose_fit': False, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['relu', 'relu'],
              'features_list': rnn_dat.features_list})
params.update({'batch_schedule_type': 'exp', 'bmin': 20, 'bmax': rnn_dat.hours})
reproducibility.set_seed(123)
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

In [None]:
errs.mean()

## LSTM

TODO: FIX BELOW

In [None]:
import importlib 
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN_LSTM

In [None]:
params = read_yml("params.yaml", subkey="lstm")
params = RNNParams(params)

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 50, 'learning_rate': 0.00005, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['relu', 'relu'], 'features_list': rnn_dat.features_list,
              'batch_schedule_type':'exp', 'bmin': 10, 'bmax':rnn_dat.hours})
reproducibility.set_seed(123)
lstm = RNN_LSTM(params)

history = lstm.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, 
                    batch_size = params['batch_size'], epochs=params['epochs'], 
                    callbacks = [ResetStatesCallback(params),
                                EarlyStoppingCallback(patience = 15)],
                   validation_data = (rnn_dat.X_val, rnn_dat.y_val))
              