# v2.1 exploration trying to make it work better

In [None]:
# Environment
import os
import os.path as osp
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
import pandas as pd
from utils import print_dict_summary
from data_funcs import rmse
from moisture_rnn import RNNParams, RNNData, RNN, RNN_LSTM, create_rnn_data2
from moisture_rnn_pkl import pkl2train
from tensorflow.keras.callbacks import Callback
from utils import hash2
import copy
import logging
import pickle
from utils import logging_setup, read_yml, read_pkl, hash_ndarray, hash_weights
import yaml
import copy

In [None]:
logging_setup()

## Test Batch Size

In [None]:
train = read_pkl('train.pkl')
train.keys()

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN, RNNData

In [None]:
params = read_yml("params.yaml", subkey="rnn")
params = RNNParams(params)
params.update({'batch_size': 7})
rnn_dat = RNNData(train['PLFI1_202401'], scaler=params['scaler'], features_list = params['features_list'])
rnn_dat.train_test_split(
    train_frac = .9,
    val_frac = .05
)
rnn_dat.scale_data()
rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])

In [None]:
max_batches = np.floor((len(rnn_dat.y_train) - params['timesteps'])/params['batch_size']+1)
max_batches

In [None]:
epochs = 20
xgrid = np.arange(epochs)
low = 10
high = 200
ramp_factor = (xgrid / epochs) ** 2
fx = (low + (high - low)) * ramp_factor
# plt.plot(xgrid, 655/(1+np.exp(-1*(xgrid-5))))
# plt.plot(xgrid, 655*(1-np.exp(-k*xgrid))+0)
# plt.plot(xgrid, 655/100*xgrid**2)
plt.scatter(xgrid, fx)
plt.plot(xgrid, fx)
plt.xlim(0,20)

In [None]:
epochs = 20
xgrid = np.arange(epochs)
low = 10
high = 200
ramp_factor = (xgrid / epochs)
fx = (low*(high/low)) ** ramp_factor
# plt.plot(xgrid, 655/(1+np.exp(-1*(xgrid-5))))
# plt.plot(xgrid, 655*(1-np.exp(-k*xgrid))+0)
# plt.plot(xgrid, 655/100*xgrid**2)
plt.scatter(xgrid, fx)
plt.plot(xgrid, fx)
plt.xlim(0,20)

In [None]:
epochs = 20
xgrid = np.arange(epochs)
low = 10
high = 200
ramp_factor = np.log(1 + xgrid) / np.log(1 + epochs)
# fx = (low*(high/low)) ** ramp_factor
fx = low + (high - low) * ramp_factor
# plt.plot(xgrid, 655/(1+np.exp(-1*(xgrid-5))))
# plt.plot(xgrid, 655*(1-np.exp(-k*xgrid))+0)
# plt.plot(xgrid, 655/100*xgrid**2)
plt.scatter(xgrid, fx)
plt.plot(xgrid, fx)
plt.xlim(0,20)

In [None]:
params.update({'epochs': 20, 'verbose_fit': True})

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

## Test Spatial Data

In [None]:
train = read_pkl('train.pkl')

In [None]:
params = read_yml("params.yaml", subkey="rnn")
params = RNNParams(params)

In [None]:
len(train.keys())

In [None]:
from itertools import islice
dat = {k: train[k] for k in islice(train, 100)}

In [None]:
dat.keys()

In [None]:
from data_funcs import combine_nested
dd = combine_nested(dat)

In [None]:
import importlib
import utils
importlib.reload(utils)
from utils import Dict

In [None]:
dd = Dict(dd)

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNNData

In [None]:
rnn_dat = RNNData(dd, scaler="standard", features_list = ['Ed', 'Ew', 'rain'])
rnn_dat.train_test_split(   
    train_frac = .9,
    val_frac = .05
)

In [None]:
rnn_dat.scale_data()

In [None]:
rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])

In [None]:
rnn_dat.X_train.shape

In [None]:
rnn_dat.y_train.shape

In [None]:
rnn_dat.n_seqs

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 10, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['tanh', 'tanh'], 'features_list': ['Ed', 'Ew', 'rain']})
reproducibility.set_seed(123)
rnn = RNN(params)

history = rnn.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, 
                    batch_size = params['batch_size'], epochs=params['epochs'], 
                    callbacks = [ResetStatesCallback(batch_reset = params['batch_reset'],
                                                     loc_batch_reset = rnn_dat.n_seqs),
                                EarlyStoppingCallback(patience = params['early_stopping_patience'])],
                   validation_data = (rnn_dat.X_val, rnn_dat.y_val))
              

In [None]:
plt.figure()
plt.semilogy(history.history['loss'], label='Training loss')
if 'val_loss' in history.history:
    plt.semilogy(history.history['val_loss'], label='Validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper left')
plt.show()

In [None]:
preds = rnn.predict(rnn_dat.X_test[0])

In [None]:
plt.plot(rnn_dat.y_test[2])
plt.plot(preds)

In [None]:
params.update({'epochs': 10, 'batch_size': 5, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 30, 'dense_layers': 2, 'dense_units': 20,
              'activation': ['tanh', 'tanh']})
reproducibility.set_seed(123)
rnn = RNN(params)

m, errs = rnn.run_model(rnn_dat)

## LSTM

TODO: FIX BELOW

In [None]:
import importlib 
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN_LSTM

In [None]:
with open("params.yaml") as file:
    params = yaml.safe_load(file)["lstm"]
    
rnn_dat2 = create_rnn_data2(train[case],params)

In [None]:
params.update({'epochs': 10})

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat2)

In [None]:
import importlib
importlib.reload(moisture_rnn)
from moisture_rnn import RNN_LSTM

In [None]:
with open("params.yaml") as file:
    params = yaml.safe_load(file)["lstm"]

rnn_dat2 = create_rnn_data2(train[case],params)
params

In [None]:
params.update({
    'learning_rate': 0.000001,
    'epochs': 10,
    'clipvalue':1.0
})

In [None]:
reproducibility.set_seed()
lstm = RNN_LSTM(params)
m, errs = lstm.run_model(rnn_dat2)