# v2.1 exploration trying to make it work better

In [None]:
# Environment
import os
import os.path as osp
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
import pandas as pd
from utils import print_dict_summary
from data_funcs import rmse
from moisture_rnn import RNNParams, RNNData, RNN, RNN_LSTM
from moisture_rnn_pkl import pkl2train
from tensorflow.keras.callbacks import Callback
from utils import hash2
import copy
import logging
import pickle
from utils import logging_setup, read_yml, read_pkl, hash_ndarray, hash_weights
import yaml
import copy

In [None]:
logging_setup()

## Test Batch Reset

In [None]:
train = read_pkl('train.pkl')
train.keys()

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN, RNNData

In [None]:
params = read_yml("params.yaml", subkey="rnn")
params = RNNParams(params)

In [None]:
rnn_dat = RNNData(train['PLFI1_202401'], scaler=params['scaler'], features_list = params['features_list'])
rnn_dat.train_test_split(
    train_frac = .9,
    val_frac = .05
)
rnn_dat.scale_data()
rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])

In [None]:
reproducibility.set_seed()
params.update({'batch_schedule_type': 'exp', 'bmin': 20, 'bmax': rnn_dat.hours})
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat, plot_period="predict")

In [None]:
rnn_dat.spatial

In [None]:
reproducibility.set_seed()
params.update({'batch_schedule_type': 'log', 'bmin': 20, 'bmax': rnn_dat.hours})
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat, plot_period="predict")

## Test Spatial Data

In [None]:
train = read_pkl('train.pkl')

In [None]:
params = read_yml("params.yaml", subkey="rnn")
params = RNNParams(params)

In [None]:
len(train.keys())

In [None]:
from itertools import islice
dat = {k: train[k] for k in islice(train, 100)}

In [None]:
dat.keys()

In [None]:
from data_funcs import combine_nested
dd = combine_nested(dat)

In [None]:
import importlib
import utils
importlib.reload(utils)
from utils import Dict

In [None]:
dd = Dict(dd)

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNNData

In [None]:
rnn_dat = RNNData(dd, scaler="standard", features_list = ['Ed', 'Ew', 'rain'])
rnn_dat.train_test_split(   
    train_frac = .9,
    val_frac = .05
)

In [None]:
rnn_dat.scale_data()

In [None]:
rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 5, 'learning_rate': 0.0001, 'verbose_fit': False, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['relu', 'relu'], 'features_list': ['Ed', 'Ew', 'rain']})
params.update({'batch_schedule_type': 'exp', 'bmin': 20, 'bmax': rnn_dat.hours})
reproducibility.set_seed(123)
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

In [None]:
errs.shape

In [None]:
errs.mean()

In [None]:
np.median(errs)

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 5, 'learning_rate': 0.0001, 'verbose_fit': False, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['relu', 'relu'], 'features_list': ['Ed', 'Ew', 'rain']})
reproducibility.set_seed(123)
rnn = RNN(params)

history = rnn.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, 
                    batch_size = params['batch_size'], epochs=params['epochs'], 
                    callbacks = [ResetStatesCallback(params),
                                EarlyStoppingCallback(patience = params['early_stopping_patience'])],
                   validation_data = (rnn_dat.X_val, rnn_dat.y_val))
              

In [None]:
plt.figure()
plt.semilogy(history.history['loss'], label='Training loss')
if 'val_loss' in history.history:
    plt.semilogy(history.history['val_loss'], label='Validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper left')
plt.show()

In [None]:
vpreds = rnn.model_train.predict(rnn_dat.X_val)

In [None]:
vpreds.shape

In [None]:
rnn_dat.y_val.shape

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mean_squared_error(vpreds, rnn_dat.y_val)

In [None]:
loss = tf.keras.losses.mse(rnn_dat.y_val, vpreds)
loss = tf.reduce_mean(loss).numpy()
loss

In [None]:
hash_weights(rnn.model_train)

In [None]:
rnn.model_predict.set_weights(rnn.model_train.get_weights())

In [None]:
hash_weights(rnn.model_predict)

In [None]:
tpreds = rnn.predict(rnn_dat.X_test[0])

In [None]:
plt.plot(rnn_dat.y_test[0])
plt.plot(tpreds)

In [None]:
rmse(rnn_dat.y_test[0], tpreds)

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 20, 'learning_rate': 0.0001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['relu', 'relu'], 'features_list': ['Ed', 'Ew', 'rain']})
reproducibility.set_seed(123)
rnn = RNN(params)

In [None]:
m, errs = rnn.run_model(rnn_dat, plot_period="predict")

In [None]:
len(rnn_dat.X)

In [None]:
len(rnn_dat.X_test)

In [None]:
preds0 = rnn.predict(rnn_dat.X_test[0])

In [None]:
rmse(preds0, rnn_dat.y_test[0])

In [None]:
plt.plot(rnn_dat.y_test[0])
plt.plot(preds0)

## LSTM

TODO: FIX BELOW

In [None]:
import importlib 
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN_LSTM

In [None]:
params = read_yml("params.yaml", subkey="lstm")
params = RNNParams(params)

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 20, 'learning_rate': 0.0001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['relu', 'relu'], 'features_list': ['Ed', 'Ew', 'rain']})
reproducibility.set_seed(123)
lstm = RNN_LSTM(params)

history = lstm.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, 
                    batch_size = params['batch_size'], epochs=params['epochs'], 
                    callbacks = [ResetStatesCallback(params),
                                EarlyStoppingCallback(patience = params['early_stopping_patience'])],
                   validation_data = (rnn_dat.X_val, rnn_dat.y_val))
              