In [None]:
# Environment
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
from utils import print_dict_summary
from data_funcs import load_and_fix_data, rmse
from moisture_rnn import run_case

## Original Case - Single Batch

In [None]:
reproducibility_file='../data/reproducibility_dict.pickle'

repro={}
repro.update(load_and_fix_data(reproducibility_file))
print_dict_summary(repro)

In [None]:
param_sets_ORIG = {'id':0,
        'purpose':'reproducibility',
        'batch_size':np.inf,
        'training':None,
        'cases':['case11'],
        'scale':0,
        'rain_do':False,
#        'verbose':False,
        'verbose':1,
        'timesteps':5,
        'activation':['linear','linear'],
        'centering':[0.0,0.0],
        'hidden_units':6,
        'dense_units':1,
        'dense_layers':1,
        'DeltaE':[0,-1],    # -1.0 is to correct E bias but put at the end
        'synthetic':False,  # run also synthetic cases
        'T1': 0.1,          # 1/fuel class (10)
        'fm_raise_vs_rain': 2.0,         # fm increase per mm rain                              
        'epochs':5000,
        'verbose_fit':0,
        'verbose_weights':False,
        'note':'check 5 should give zero error'
        }

In [None]:
reproducibility.set_seed()
print('Running reproducibility')
assert param_sets_ORIG['purpose'] == 'reproducibility'
param_sets_ORIG['initialize']=True 
case = 'case11'
case_data=repro[case]
run_case(case_data,param_sets_ORIG)

## Stateless RNN

In [None]:
N = len(repro[case]["fm"]) # total observations
train_ind = int(N*.8) # index of train/val split
test_ind = int(N*.9) # index of train/test split

X = np.vstack((repro[case]["Ed"], repro[case]["Ew"])).T
y = repro[case]["fm"]

X_train = X[:train_ind]
X_val = X[train_ind:test_ind]
X_test = X[test_ind:]
y_train = y[:train_ind].reshape(-1,1)
y_val = y[train_ind:test_ind].reshape(-1,1)
y_test = y[test_ind:].reshape(-1,1)

print(f"Total Observations: {N}")
print(f"Num Training: {X_train.shape[0]}")
print(f"Num Validation: {X_val.shape[0]}")
print(f"Num Test: {X_test.shape[0]}")

In [None]:
params = {
        'timesteps':5, # length of sequence used in gradient calculation
        'batch_size':32, # number of sequences (of length timesteps) processed in single training pass
        'scale':0,
        'activation':['linear','linear'], # [hidden layer, output layer]
        'hidden_units':6,
        # 'dense_units':1, # for single timeseries
        # 'dense_layers':1,                           
        'epochs':100
        }

In [None]:
from moisture_rnn import staircase
X_train, y_train = staircase(X_train, y_train, timesteps = params["timesteps"], datapoints = len(y_train))
X_val, y_val = staircase(X_val, y_val, timesteps = params["timesteps"], datapoints = len(y_val))
X_test, y_test = staircase(X_test, y_test, timesteps = params["timesteps"], datapoints = len(y_test))

In [None]:
print(f"y_train shape: {y_train.shape}")
print(f"X_train shape: {X_train.shape}")
print(f"y_val shape: {y_val.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"y_test shape: {y_test.shape}")
print(f"X_test shape: {X_test.shape}")

In [None]:
reproducibility.set_seed()
model = tf.keras.Sequential()
model.add(tf.keras.layers.SimpleRNN(6, input_shape=(params["timesteps"], X_train.shape[2]), activation="linear"))
model.add(tf.keras.layers.Dense(1, activation="linear")) 
model.compile(loss='mean_squared_error', optimizer='adam')
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

In [None]:
fitted = model.predict(X_train)
preds = model.predict(X_test)
print("~"*50)
print(f"RMSE Train: {rmse(fitted, y_train)}")
print(f"RMSE Test: {rmse(preds, y_test)}")

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
repro[case].keys()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(repro[case]["fm"])
plt.plot(fitted, label = "fitted")

## Changing Hyperparams

In [None]:
reproducibility.set_seed()
model = tf.keras.Sequential()
model.add(tf.keras.layers.SimpleRNN(
    units=6, 
    input_shape=(params["timesteps"], X_train.shape[2]), 
    activation="linear",
    dropout= 0.2,
    recurrent_dropout = 0.2,
))
model.add(tf.keras.layers.Dense(1, activation="linear")) 
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='mean_squared_error', optimizer=optimizer)
model.fit(X_train, y_train, 
          epochs=20, 
          batch_size = 16, 
          validation_data=(X_val, y_val))

fitted = model.predict(X_train)
preds = model.predict(X_test)

print(f"RMSE Train: {rmse(fitted, y_train)}")
print(f"RMSE Test: {rmse(preds, y_test)}")

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

## Stateful RNN - Batch Size in Inputs

In [None]:
N = len(repro[case]["fm"]) # total observations
train_ind = int(N*.8) # index of train/val split
test_ind = int(N*.9) # index of train/test split

X = np.vstack((repro[case]["Ed"], repro[case]["Ew"])).T
y = repro[case]["fm"]

X_train = X[:train_ind]
X_val = X[train_ind:test_ind]
X_test = X[test_ind:]
y_train = y[:train_ind].reshape(-1,1)
y_val = y[train_ind:test_ind].reshape(-1,1)
y_test = y[test_ind:].reshape(-1,1)

print(f"Total Observations: {N}")
print(f"Num Training: {X_train.shape[0]}")
print(f"Num Validation: {X_val.shape[0]}")
print(f"Num Test: {X_test.shape[0]}")

In [None]:
X_train.shape

In [None]:
batch_size = params["batch_size"]
timesteps = params["timesteps"]
features = X_train.shape[1]

reproducibility.set_seed()
model = tf.keras.Sequential()
model.add(tf.keras.layers.SimpleRNN(6, batch_input_shape=(batch_size, timesteps, features), activation="linear", stateful=True))
model.add(tf.keras.layers.Dense(1, activation="linear")) 
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
## Naive approach DOESN'T WORK
# ntrain = X_train.shape[0]
# X_train.reshape((ntrain, timesteps, features))

In [None]:
from moisture_rnn import staircase_2
X_train, y_train = staircase_2(X_train, y_train, timesteps = params["timesteps"], batch_size=params["batch_size"], verbose=False)
X_val, y_val = staircase_2(X_val, y_val, timesteps = params["timesteps"], batch_size=params["batch_size"], verbose=False)
X_test, y_test = staircase_2(X_test, y_test, timesteps = params["timesteps"], batch_size=params["batch_size"], verbose=False)

In [None]:
history = model.fit(X_train, y_train, epochs=100, validation_data = (X_val, y_val))

In [None]:
fitted = model.predict(X_train)
preds = model.predict(X_test)

print(f"RMSE Train: {rmse(fitted, y_train)}")
print(f"RMSE Test: {rmse(preds, y_test)}")

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()