In [None]:
# Environment
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
from moisture_rnn import RNN
import reproducibility
from utils import print_dict_summary
from data_funcs import load_and_fix_data, rmse
from moisture_rnn0 import run_case
from moisture_rnn_pkl import pkl2train
from utils import hash2

### Reproducibility Datasets

In [None]:
# Original File
reproducibility_file='reproducibility_dict0.pickle'

repro={}
repro.update(load_and_fix_data(reproducibility_file))
print_dict_summary(repro)

In [None]:
# Restructured original file
reproducibility_file='../data/reproducibility_dict2.pickle'
repro2 = pkl2train([reproducibility_file])
print_dict_summary(repro2)

## RNN with Stateful Batch Training


### Custom Class

In [None]:
from moisture_rnn import create_rnn_data2, RNN
import logging
from utils import logging_setup
logging_setup()

In [None]:
import yaml

with open("../params.yaml") as file:
    params = yaml.safe_load(file)["rnn_repro"]
# params.update({'scale': 1})
params

In [None]:
rnn_dat = create_rnn_data2(repro2["reproducibility"], params)

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

### Physics Initialized

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN

In [None]:
params.update({'phys_initialize': True})
reproducibility.set_seed()
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

### Using Old Code with `run_case`

In [None]:
from module_param_sets0 import param_sets
params = param_sets['0']

In [None]:
reproducibility.set_seed()
params['initialize']=False 
case = 'case11'
case_data=repro[case]
case_data['h2']=427
run_case(case_data,params)

### Physics Initialized

In [None]:
from module_param_sets0 import param_sets
params = param_sets['0']
params['verbose_weights']=True
reproducibility.set_seed()
params['initialize']=True 
case = 'case11'
case_data=repro[case]
case_data['h2']=427
run_case(case_data,params)

## Original Case - Single Batch

In [None]:
# NOTE: original param sets live in model_param_sets0 but commented out, manually reproducing here
param_sets_ORIG = {'id':0,
        'purpose':'reproducibility',
        'batch_size':np.inf,
        'training':None,
        'cases':['case11'],
        'scale':0,
        'rain_do':False,
#        'verbose':False,
        'verbose':1,
        'timesteps':5,
        'activation':['linear','linear'],
        'centering':[0.0,0.0],
        'hidden_units':6,
        'dense_units':1,
        'dense_layers':1,
        'DeltaE':[0,-1],    # -1.0 is to correct E bias but put at the end
        'synthetic':False,  # run also synthetic cases
        'T1': 0.1,          # 1/fuel class (10)
        'fm_raise_vs_rain': 2.0,         # fm increase per mm rain                              
        'epochs':5000,
        'verbose_fit':0,
        'verbose_weights':True,
        'note':'check 5 should give zero error'
        }

### Using Old RNN Code

Code is deployed through the `run_case` function.

In [None]:
reproducibility.set_seed()
print('Running reproducibility')
assert param_sets_ORIG['purpose'] == 'reproducibility'
param_sets_ORIG['initialize']=False 
case = 'case11'
case_data=repro[case]
case_data["h2"]=300
run_case(case_data,param_sets_ORIG)

### Reproduce with Class Code

Code deployed through custom class, and parameters come from yaml file.

In [None]:
from tensorflow.keras.callbacks import Callback
from abc import ABC, abstractmethod
class ResetStatesCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        self.model.reset_states()
        
from sklearn.metrics import mean_squared_error
def rmse(a, b):
    return np.sqrt(mean_squared_error(a.flatten(), b.flatten()))


class RNNModel(ABC):
    def __init__(self, params: dict):
        self.params = params
        if type(self) is RNNModel:
            raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
        super().__init__()

    @abstractmethod
    def fit(self, X_train, y_train, weights=None):
        pass

    @abstractmethod
    def predict(self, X):
        pass

class RNN(RNNModel):
    def __init__(self, params, loss='mean_squared_error'):
        super().__init__(params)
        self.model_train = self._build_model_train()
        self.model_predict = self._build_model_predict()
        # self.compile_model()

    def _build_model_train(self, return_sequences=False):
        inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
        x = inputs
        for i in range(self.params['rnn_layers']):
            x = tf.keras.layers.SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
                  stateful=self.params['stateful'],return_sequences=return_sequences)(x)
        for i in range(self.params['dense_layers']):
            x = tf.keras.layers.Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
        model = tf.keras.Model(inputs=inputs, outputs=x)
        model.compile(loss='mean_squared_error', optimizer='adam')
        if self.params["verbose_weights"]:
            print(f"Initial Weights Hash: {hash2(model.get_weights())}")
        
        return model
    def _build_model_predict(self, return_sequences=True):
        
        inputs = tf.keras.Input(shape=self.params['pred_input_shape'])
        x = inputs
        for i in range(self.params['rnn_layers']):
            x = tf.keras.layers.SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
                  stateful=False,return_sequences=return_sequences)(x)
        for i in range(self.params['dense_layers']):
            x = tf.keras.layers.Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
        model = tf.keras.Model(inputs=inputs, outputs=x)
        model.compile(loss='mean_squared_error', optimizer='adam')  

        # Set Weights to model_train
        w_fitted = self.model_train.get_weights()
        model.set_weights(w_fitted)
        
        return model

    def fit(self, X_train, y_train, plot=True, weights=None, callbacks=[], verbose_fit=None):
        # verbose_fit argument is for printing out update after each epoch, which gets very long
        # These print statements at the top could be turned off with a verbose argument, but then
        # there would be a bunch of different verbose params
        print(f"Training simple RNN with params: {self.params}")
        print(f"X_train hash: {hash2(X_train)}")
        print(f"y_train hash: {hash2(y_train)}")
        print(f"Initial weights before training hash: {hash2(self.model_train.get_weights())}")
        # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call 
        if verbose_fit is None:
            verbose_fit = self.params['verbose_fit']
        # Evaluate Model once to set nonzero initial state
        if self.params["batch_size"]>= X_train.shape[0]:
            self.model_train(X_train)
        # Fit Model
        history = self.model_train.fit(
            X_train, y_train+self.params['centering'][1], 
            epochs=self.params['epochs'], 
            batch_size=self.params['batch_size'],
            callbacks = callbacks,
            verbose=verbose_fit)
        if plot:
            self.plot_history(history)
        if self.params["verbose_weights"]:
            print(f"Fitted Weights Hash: {hash2(self.model_train.get_weights())}")

        # Update Weights for Prediction Model
        w_fitted = self.model_train.get_weights()
        self.model_predict.set_weights(w_fitted)
    def predict(self, X_test):
        print("Predicting with simple RNN")
        preds = self.model_predict.predict(X_test)
        return preds
    def plot_history(self, history):
        plt.semilogy(history.history['loss'], label='Training loss')
        if 'val_loss' in history.history:
            plt.semilogy(history.history['val_loss'], label='Validation loss')
        plt.title(case + ' Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(loc='upper left')
        plt.show()


In [None]:
import yaml

with open("../params.yaml") as file:
    params = yaml.safe_load(file)["rnn"]

params.update({
    'dropout': [0, 0], # NOTE: length must match total number of layers, default is 1 hidden recurrent layer and 1 dense output layer
    'recurrent_dropout': 0, # Length must match number of recurrent layers
})

In [None]:
N = len(repro[case]["fm"]) # total observations
train_ind = repro[case]['h2']

X = np.vstack((repro[case]["Ed"], repro[case]["Ew"])).T
y = repro[case]["fm"]

X_train = X[:train_ind]
X_test = X[train_ind:]
y_train = y[:train_ind].reshape(-1,1)
y_test = y[train_ind:].reshape(-1,1)

print(f"Total Observations: {N}")
print(f"Num Training: {X_train.shape[0]}")
print(f"Num Test: {X_test.shape[0]}")

from moisture_rnn import staircase
X_train, y_train = staircase(X_train, y_train, timesteps = params["timesteps"], datapoints = len(y_train), verbose=True)
print("~"*50)
phours, features = X_test.shape
X_test = np.reshape(X_test,(1, phours, features))
print(f"X_test shape: {X_test.shape}")

In [None]:
samples, timesteps, features = X_train.shape
batch_size = samples # Single batch for testing

params.update({
    'batch_shape': (batch_size,timesteps,features),
    'batch_size': batch_size, # Single Batch for testing
    'pred_input_shape': (X.shape[0], X.shape[1]),
    'epochs': 5000,
    'stateful': True,
    'features': features
})

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
m = rnn.predict(np.reshape(X,(1, X.shape[0], features)))
print(hash2(m))
rnn.fit(X_train, y_train)

In [None]:
preds = rnn.predict(np.reshape(X,(1, X.shape[0], features)))
rmse(preds, y)