In [None]:
# Environment
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
from utils import print_dict_summary
from data_funcs import load_and_fix_data, rmse
from moisture_rnn import RNN, create_rnn_data2
from moisture_rnn_pkl import pkl2train
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import mean_squared_error
from utils import hash2
import copy
import logging
from utils import logging_setup

In [None]:
logging_setup()

## Test Data Creation

In [None]:
import yaml

with open("params.yaml") as file:
    params = yaml.safe_load(file)["rnn"]
# params.update({'scale': 1})

In [None]:
train = pkl2train(['data/reproducibility_dict2.pickle', "data/test_CA_202401.pkl"])

In [None]:
params2 = copy.deepcopy(params)

In [None]:
rnn_dat = create_rnn_data2(train['reproducibility'], params2)

In [None]:
reproducibility.set_seed()
rnn = RNN(params2)
rnn.run_model(rnn_dat)

In [None]:
train.keys()

In [None]:
params2.update({'val_frac': .2})
rnn_dat = create_rnn_data2(train['CNFC1_202401'], params2)

In [None]:
reproducibility.set_seed()
rnn = RNN(params2)
rnn.run_model(rnn_dat)

## LSTM

In [None]:
from moisture_rnn import RNN_LSTM

# from tensorflow.keras.layers import LSTM, Input, Dropout, Dense, SimpleRNN
# from moisture_rnn import staircase_2
# from abc import ABC, abstractmethod
# from data_funcs import compare_dicts
# class RNNModel(ABC):
#     def __init__(self, params: dict):
#         self.params = params
#         if type(self) is RNNModel:
#             raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
#         super().__init__()

#     @abstractmethod
#     def fit(self, X_train, y_train, weights=None):
#         pass

#     @abstractmethod
#     def predict(self, X):
#         pass

#     def run_model(self, dict0):
#         # Make copy to prevent changing in place
#         dict1 = copy.deepcopy(dict0)
#         # Extract Fields
#         X_train, y_train, X_test, y_test = dict1['X_train'].copy(), dict1['y_train'].copy(), dict1["X_test"].copy(), dict1['y_test'].copy()
#         if 'X_val' in dict1:
#             X_val, y_val = dict1['X_val'].copy(), dict1['y_val'].copy()
#         else:
#             X_val = None
#         case_id = dict1['case']

#         # Fit model
#         if X_val is None:
#             self.fit(X_train, y_train)
#         else:
#             self.fit(X_train, y_train, validation_data = (X_val, y_val))
#         # Generate Predictions, 
#         # run through training to get hidden state set proporly for forecast period
#         if X_val is None:
#             X = np.concatenate((X_train, X_test))
#             y = np.concatenate((y_train, y_test)).flatten()
#         else:
#             X = np.concatenate((X_train, X_val, X_test))
#             y = np.concatenate((y_train, y_val, y_test)).flatten()
#         # Predict
#         print(f"Predicting Training through Test \n features hash: {hash2(X)} \n response hash: {hash2(y)} ")
#         m = self.predict(X).flatten()
#         dict1['m']=m
#         dict0['m']=m # add to outside env dictionary, should be only place this happens
#         if self.params['scale']:
#             print(f"Rescaling data using {self.params['scaler']}")
#             if self.params['scaler'] == "reproducibility":
#                 m  *= self.params['scale_fm']
#                 y  *= self.params['scale_fm']
#                 y_train *= self.params['scale_fm']
#                 y_test *= self.params['scale_fm']
#         # Check Reproducibility, TODO: old dict calls it hidden_units not rnn_units, so this doens't check that
#         if (case_id == "reproducibility") and compare_dicts(self.params, repro_hashes['params'], ['epochs', 'batch_size', 'scale', 'activation', 'learning_rate']):
#             print("Checking Reproducibility")
#             checkm = m[350]
#             hv = hash2(self.model_predict.get_weights())
#             if self.params['phys_initialize']:
#                 hv5 = repro_hashes['phys_initialize']['fitted_weight_hash']
#                 mv = repro_hashes['phys_initialize']['predictions_hash']
#             else:
#                 hv5 = repro_hashes['rand_initialize']['fitted_weight_hash']
#                 mv = repro_hashes['rand_initialize']['predictions_hash']           
            
#             print(f"Fitted weights hash (check 5): {hv}, Reproducibility weights hash: {hv5}, Error: {hv5-hv}")
#             print(f"Model predictions hash: {checkm}, Reproducibility preds hash: {mv}, Error: {mv-checkm}")

#         # print(dict1.keys())
#         # Plot final fit and data
#         # TODO: make plot_data specific to this context
#         dict1['y'] = y
#         plot_data(dict1, title="RNN", title2=dict1['case'])
        
#         # Calculate Errors
#         err = rmse(m, y)
#         train_ind = dict1["train_ind"] # index of final training set value
#         test_ind = dict1["test_ind"] # index of first test set value
#         err_train = rmse(m[:train_ind], y_train.flatten())
#         err_pred = rmse(m[test_ind:], y_test.flatten())
#         rmse_dict = {
#             'all': err, 
#             'training': err_train, 
#             'prediction': err_pred
#         }
#         return rmse_dict
        
# class ResetStatesCallback(Callback):
#     def on_epoch_end(self, epoch, logs=None):
#         self.model.reset_states()


# class RNN_LSTM(RNNModel):
#     def __init__(self, params, loss='mean_squared_error'):
#         super().__init__(params)
#         self.model_train = self._build_model_train()
#         self.model_predict = self._build_model_predict()

#     def _build_model_train(self, return_sequences=False):
#         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
#         x = inputs
#         for i in range(self.params['rnn_layers']):
#             x = LSTM(
#                 units=self.params['rnn_units'],
#                 activation=self.params['activation'][0],
#                 dropout=self.params["dropout"][0],
#                 stateful=self.params['stateful'],
#                 return_sequences=return_sequences)(x)
#         if self.params["dropout"][1] > 0:
#             x = Dropout(self.params["dropout"][1])(x)            
#         for i in range(self.params['dense_layers']):
#             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
#         model = tf.keras.Model(inputs=inputs, outputs=x)
#         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
#         model.compile(loss='mean_squared_error', optimizer=optimizer)
        
#         if self.params["verbose_weights"]:
#             print(f"Initial Weights Hash: {hash2(model.get_weights())}")
#         return model
#     def _build_model_predict(self, return_sequences=True):
        
#         inputs = tf.keras.Input(shape=self.params['pred_input_shape'])
#         x = inputs
#         for i in range(self.params['rnn_layers']):
#             x = LSTM(
#                 units=self.params['rnn_units'],
#                 activation=self.params['activation'][0],
#                 stateful=False,return_sequences=return_sequences)(x)
#         for i in range(self.params['dense_layers']):
#             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
#         model = tf.keras.Model(inputs=inputs, outputs=x)
#         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
#         model.compile(loss='mean_squared_error', optimizer=optimizer)  

#         # Set Weights to model_train
#         w_fitted = self.model_train.get_weights()
#         model.set_weights(w_fitted)
        
#         return model
#     def format_train_data(self, X, y, verbose=False):
#         X, y = staircase_2(X, y, timesteps = self.params["timesteps"], batch_size=self.params["batch_size"], verbose=verbose)
#         return X, y
#     def format_pred_data(self, X):
#         return np.reshape(X,(1, X.shape[0], self.params['features']))
#     def fit(self, X_train, y_train, plot=True, plot_title = '', 
#             weights=None, callbacks=[], verbose_fit=None, validation_data=None, *args, **kwargs):
#         # verbose_fit argument is for printing out update after each epoch, which gets very long
#         # These print statements at the top could be turned off with a verbose argument, but then
#         # there would be a bunch of different verbose params
#         print(f"Training simple RNN with params: {self.params}")
#         X_train, y_train = self.format_train_data(X_train, y_train)
#         print(f"X_train hash: {hash2(X_train)}")
#         print(f"y_train hash: {hash2(y_train)}")
#         if validation_data is not None:
#             X_val, y_val = self.format_train_data(validation_data[0], validation_data[1])
#             print(f"X_val hash: {hash2(X_val)}")
#             print(f"y_val hash: {hash2(y_val)}")
#         print(f"Initial weights before training hash: {hash2(self.model_train.get_weights())}")
#         # Setup callbacks
#         if self.params["reset_states"]:
#             callbacks=callbacks+[ResetStatesCallback()]
        
#         # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call 
#         if verbose_fit is None:
#             verbose_fit = self.params['verbose_fit']
#         # Evaluate Model once to set nonzero initial state
#         if self.params["batch_size"]>= X_train.shape[0]:
#             self.model_train(X_train)
#         if validation_data is not None:
#             history = self.model_train.fit(
#                 X_train, y_train+self.params['centering'][1], 
#                 epochs=self.params['epochs'], 
#                 batch_size=self.params['batch_size'],
#                 callbacks = callbacks,
#                 verbose=verbose_fit,
#                 validation_data = (X_val, y_val),
#                 *args, **kwargs
#             )
#         else:
#             history = self.model_train.fit(
#                 X_train, y_train+self.params['centering'][1], 
#                 epochs=self.params['epochs'], 
#                 batch_size=self.params['batch_size'],
#                 callbacks = callbacks,
#                 verbose=verbose_fit,
#                 *args, **kwargs
#             )
#         if plot:
#             self.plot_history(history,plot_title)
#         if self.params["verbose_weights"]:
#             print(f"Fitted Weights Hash: {hash2(self.model_train.get_weights())}")

#         # Update Weights for Prediction Model
#         w_fitted = self.model_train.get_weights()
#         self.model_predict.set_weights(w_fitted)
#     def predict(self, X_test):
#         print("Predicting with simple RNN")
#         X_test = self.format_pred_data(X_test)
#         preds = self.model_predict.predict(X_test).flatten()
#         return preds


#     def plot_history(self, history, plot_title):
#         plt.semilogy(history.history['loss'], label='Training loss')
#         if 'val_loss' in history.history:
#             plt.semilogy(history.history['val_loss'], label='Validation loss')
#         plt.title(f'{plot_title} Model loss')
#         plt.ylabel('Loss')
#         plt.xlabel('Epoch')
#         plt.legend(loc='upper left')
#         plt.show()

In [None]:
rnn_dat = create_rnn_data2(train['reproducibility'],params2)

In [None]:
# from tensorflow.keras.layers import LSTM, Input, Dropout, Dense
reproducibility.set_seed()
params2.update({'epochs': 50})
lstm = RNN_LSTM(params2)
lstm.fit(rnn_dat["X_train"], rnn_dat["y_train"])

In [None]:
lstm = RNN_LSTM(params2)
lstm.fit(rnn_dat["X_train"], rnn_dat["y_train"], 
         validation_data = (rnn_dat['X_val'], rnn_dat['y_val']))

In [None]:
# from moisture_rnn import repro_hashes

In [None]:
lstm = RNN_LSTM(params2)
lstm.run_model(rnn_dat)

In [None]:
import importlib 
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN_LSTM

In [None]:
with open("params.yaml") as file:
    params = yaml.safe_load(file)["lstm"]
    
rnn_dat = create_rnn_data2(train['reproducibility'],params)

In [None]:
params

In [None]:
reproducibility.set_seed()
lstm = RNN_LSTM(params)
lstm.fit(rnn_dat["X_train"], rnn_dat["y_train"])

In [None]:
params.update({'epochs': 75})
reproducibility.set_seed()
lstm = RNN_LSTM(params)
lstm.run_model(rnn_dat)