In [None]:
# Environment
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
from utils import print_dict_summary
from data_funcs import load_and_fix_data, rmse
from abc import ABC, abstractmethod
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import mean_squared_error
from utils import hash2

In [None]:
reproducibility_file='version_control/reproducibility_dict0.pickle'

repro={}
repro.update(load_and_fix_data(reproducibility_file))
print_dict_summary(repro)

case = 'case11'
case_data=repro[case]
case_data["h2"]=300

## Single Batch Train

In [None]:
class ResetStatesCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        self.model.reset_states()
        
from sklearn.metrics import mean_squared_error
def rmse(a, b):
    return np.sqrt(mean_squared_error(a.flatten(), b.flatten()))


class RNNModel(ABC):
    def __init__(self, params: dict):
        self.params = params
        if type(self) is RNNModel:
            raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
        super().__init__()

    @abstractmethod
    def fit(self, X_train, y_train, weights=None):
        pass

    @abstractmethod
    def predict(self, X):
        pass

class RNN(RNNModel):
    def __init__(self, params, loss='mean_squared_error'):
        super().__init__(params)
        self.model_fit = self._build_model_fit()
        self.model_predict = self._build_model_predict()
        # self.compile_model()

    def _build_model_fit(self, return_sequences=False):
        inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
        x = inputs
        for i in range(self.params['rnn_layers']):
            x = tf.keras.layers.SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
                  stateful=self.params['stateful'],return_sequences=return_sequences)(x)
        for i in range(self.params['dense_layers']):
            x = tf.keras.layers.Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
        model = tf.keras.Model(inputs=inputs, outputs=x)
        model.compile(loss='mean_squared_error', optimizer='adam')
        if self.params["verbose_weights"]:
            print(f"Initial Weights Hash: {hash2(model.get_weights())}")
        
        return model
    def _build_model_predict(self, return_sequences=True):
        
        inputs = tf.keras.Input(shape=self.params['pred_input_shape'])
        x = inputs
        for i in range(self.params['rnn_layers']):
            x = tf.keras.layers.SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
                  stateful=False,return_sequences=return_sequences)(x)
        for i in range(self.params['dense_layers']):
            x = tf.keras.layers.Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
        model = tf.keras.Model(inputs=inputs, outputs=x)
        model.compile(loss='mean_squared_error', optimizer='adam')  

        # Set Weights to model_fit
        w_fitted = self.model_fit.get_weights()
        model.set_weights(w_fitted)
        
        return model
    # def compile_model(self):
    #     optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
    #     self.model.compile(optimizer=optimizer,
    #                        loss='mean_squared_error',
    #                        metrics=self.params.get('metrics', ['accuracy']))
    def fit(self, X_train, y_train, plot=True, weights=None, callbacks=[], verbose_fit=None):
        # verbose_fit argument is for printing out update after each epoch, which gets very long
        # These print statements at the top could be turned off with a verbose argument, but then
        # there would be a bunch of different verbose params
        print(f"Training simple RNN with params: {self.params}")
        print(f"X_train hash: {hash2(X_train)}")
        print(f"y_train hash: {hash2(y_train)}")
        print(f"Initial weights before training hash: {hash2(self.model_fit.get_weights())}")
        # reproducibility.set_seed()
        # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call 
        if verbose_fit is None:
            verbose_fit = self.params['verbose_fit']
        # Evaluate Model once to set nonzero initial state
        if self.params["batch_size"]>= X_train.shape[0]:
            self.model_fit(X_train)
        # Fit Model
        history = self.model_fit.fit(
            X_train, y_train+self.params['centering'][1], 
            epochs=self.params['epochs'], 
            batch_size=self.params['batch_size'],
            callbacks = callbacks,
            verbose=verbose_fit)
        if plot:
            self.plot_history(history)
        if self.params["verbose_weights"]:
            print(f"Fitted Weights Hash: {hash2(self.model_fit.get_weights())}")

        # Update Weights for Prediction Model
        w_fitted = self.model_fit.get_weights()
        self.model_predict.set_weights(w_fitted)
    def predict(self, X_test):
        print("Predicting with simple RNN")
        preds = self.model_predict.predict(X_test)
        return preds
    def plot_history(self, history):
        plt.semilogy(history.history['loss'], label='Training loss')
        if 'val_loss' in history.history:
            plt.semilogy(history.history['val_loss'], label='Validation loss')
        plt.title(case + ' Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(loc='upper left')
        plt.show()


In [None]:
import yaml

with open("params.yaml") as file:
    params = yaml.safe_load(file)["rnn"]

params.update({
    'dropout': [0, 0], # NOTE: length must match total number of layers, default is 1 hidden recurrent layer and 1 dense output layer
    'recurrent_dropout': 0, # Length must match number of recurrent layers
})

In [None]:
N = len(repro[case]["fm"]) # total observations
# train_ind = int(N*.8) # index of train/val split
train_ind = repro[case]['h2']
# test_ind = int(N*.9) # index of train/test split
# test_ind = 500

X = np.vstack((repro[case]["Ed"], repro[case]["Ew"])).T
y = repro[case]["fm"]

X_train = X[:train_ind]
# X_val = X[train_ind:test_ind]
X_test = X[train_ind:]
y_train = y[:train_ind].reshape(-1,1)
# y_val = y[train_ind:test_ind].reshape(-1,1)
y_test = y[train_ind:].reshape(-1,1)

print(f"Total Observations: {N}")
print(f"Num Training: {X_train.shape[0]}")
# print(f"Num Validation: {X_val.shape[0]}")
print(f"Num Test: {X_test.shape[0]}")

In [None]:
from moisture_rnn import staircase
X_train, y_train = staircase(X_train, y_train, timesteps = params["timesteps"], datapoints = len(y_train), verbose=True)
print("~"*50)
# X_val, y_val = staircase(X_val, y_val, timesteps = params["timesteps"], datapoints = len(y_val))
# print("~"*50)
# X_test, y_test = staircase(X_test, y_test, timesteps = params["timesteps"], datapoints = len(y_test), verbose=True)
phours, features = X_test.shape
X_test = np.reshape(X_test,(1, phours, features))
print(f"X_test shape: {X_test.shape}")

In [None]:
samples, timesteps, features = X_train.shape
batch_size = samples # Single batch for testing

params.update({
    'batch_shape': (batch_size,timesteps,features),
    'batch_size': batch_size, # Single Batch for testing
    'pred_input_shape': (X.shape[0], X.shape[1]),
    'epochs': 5000,
    'stateful': True
})

In [None]:
reproducibility.set_seed()
rnn = RNN(params)

In [None]:
m = rnn.predict(np.reshape(X,(1, X.shape[0], features)))

In [None]:
hash2(m)

In [None]:
rnn.fit(X_train, y_train, verbose_fit=False)

In [None]:
preds = rnn.predict(np.reshape(X,(1, X.shape[0], features)))

In [None]:
from sklearn.metrics import mean_squared_error

np.sqrt(mean_squared_error(preds.flatten(), y.flatten()))

## Stateful Batch Training

In [None]:
from moisture_rnn import create_rnn_data, RNN
import logging
from utils import logging_setup
logging_setup()

In [None]:
reproducibility_file='data/reproducibility_dict2.pickle'

repro={}
repro.update(load_and_fix_data(reproducibility_file))
print_dict_summary(repro)

In [None]:
import yaml

with open("params.yaml") as file:
    params = yaml.safe_load(file)["rnn"]
params.update({'scale': 1})
params

In [None]:
rnn_dat = create_rnn_data(repro["reproducibility"], params)

In [None]:
# Update Params for Reproducibility

params.update({
    'epochs':200,
    'dropout': [0, 0], # NOTE: length must match total number of layers, default is 1 hidden recurrent layer and 1 dense output layer
    'recurrent_dropout': 0, # Length must match number of recurrent layers    
    'rnn_units': 20
})

In [None]:
reproducibility.set_seed()
rnn = RNN(params)

In [None]:
rnn.fit(rnn_dat["X_train"], rnn_dat["y_train"])
# rnn.fit(X_train, y_train)

In [None]:
# preds = rnn.predict(np.reshape(X,(1, X.shape[0], features)))
print(hash2(rnn_dat['X']))
print(hash2(rnn_dat['y']))
preds = rnn.predict(rnn_dat['X'])
rmse(preds, rnn_dat['y'])

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
rnn.run_model(rnn_dat)

---

## New Developments

## Other Hyperparams

In [None]:
params.update({
    'activation': ['sigmoid', 'relu'], # Length must match total number of layers
    'dropout': [0.2, 0.2], # NOTE: length must match total number of layers, default is 1 hidden recurrent layer and 1 dense output layer
    'recurrent_dropout': 0.2, # Length must match number of recurrent layers
})

In [None]:
reproducibility.set_seed()
rnn = RNN(params)

In [None]:
rnn.fit(rnn_dat["X_train"], rnn_dat["y_train"], verbose_fit=False)

In [None]:
preds = rnn.predict(rnn_dat["X"])
# np.sqrt(mean_squared_error(preds.flatten(), rnn_dat["y"].flatten())
rmse(preds, rnn_dat["y"])

## Validation Error

In [None]:
params.update({
    'train_frac': 0.5,
    'val_frac': 0.1
})
rnn_dat = create_rnn_data(repro["reproducibility"], params)

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
rnn.fit(rnn_dat["X_train"], rnn_dat["y_train"], validation_data = (rnn_dat["X_val"], rnn_dat["y_val"]))

In [None]:
preds = rnn.predict(rnn_dat["X"])
np.sqrt(mean_squared_error(preds.flatten(), rnn_dat["y"].flatten()))