# Basic Neural Networks 

(to compare pre-transfer learning)

In [1]:
# Standard imports
import os
import datetime
from pathlib import Path
from collections import defaultdict
import scipy
import random
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import pickle

# Machine learning libraries
import sklearn            # machine-learning libary with many algorithms implemented
#import xgboost as xgb     # extreme gradient boosting (XGB)
#from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential

# Python file with supporting functions
import model_utils

2023-01-11 11:44:31.354911: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
recon_model_path = '/home/julias/MLEE-final-project/models/saved_models/recon_models'

# Load Split Datasets and Create Versions for Experimentation

## Load Split Datasets

In [71]:
X_ds = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X.nc').sortby(['time','xlon','ylat']).transpose('time','ylat','xlon')
y_ds = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y.nc').sortby(['time','xlon','ylat']).transpose('time','ylat','xlon')
X_train_ds = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X_train.nc').sortby(['time','xlon','ylat']).transpose('time','ylat','xlon')
y_train_ds = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y_train.nc').sortby(['time','xlon','ylat']).transpose('time','ylat','xlon')
X_test_ds = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X_test.nc').sortby(['time','xlon','ylat']).transpose('time','ylat','xlon')
y_test_ds = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y_test.nc').sortby(['time','xlon','ylat']).transpose('time','ylat','xlon')

In [72]:
X_df = X_ds.to_dataframe().dropna()
y_df = y_ds.to_dataframe().dropna() 
X_train_df = X_train_ds.to_dataframe().dropna() 
y_train_df = y_train_ds.to_dataframe().dropna()
X_test_df = X_test_ds.to_dataframe().dropna() 
y_test_df = y_test_ds.to_dataframe().dropna() 

In [77]:
# X_train_ds.SSS[0,:,:].plot()

In [None]:
# for future for loop to open datasets
#ds_name = {'X', 'y', 'X_train', 'y_train', 'X_test', 'y_test'}
#ds_list = {'X_ds', 'y_ds', 'X_train_ds', 'y_train_ds', 'X_test_ds', 'y_test_ds'}

#ddict = {'X': 'X_ds', 
#         'y': 'y_ds', 
#         'X_train': 'X_train_ds', 
#         'y_train': 'y_train_ds',
#         'X_test': 'X_test_ds',
#         'y_test': 'y_test_ds',
#        }
#data_path = '/home/julias/MLEE-final-project/proc_data/split_datasets/{}.nc'

#for key in ddict:
#    #print(key, 'corresponds to', ddict[key])
#    ddict[key] = xr.open_dataset(data_path.format(key))

In [69]:
#X_ds_list = {'X_ds', 'X_train_ds', 'X_test_ds'}
#y_ds_list = {'y_ds', 'y_train_ds', 'y_test_ds'}

X_ds_norm = normalize_X_dataset(X_ds)
X_train_ds_norm = normalize_X_dataset(X_train_ds)
X_test_ds_norm = normalize_X_dataset(X_test_ds)

y_ds_norm = normalize_y_dataset(y_ds)
y_train_ds_norm = normalize_y_dataset(y_train_ds)
y_test_ds_norm = normalize_y_dataset(y_test_ds)

  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  var = 

In [79]:
X_df_norm = X_ds_norm.to_dataframe().dropna()
X_train_df_norm = X_train_ds_norm.to_dataframe().dropna()
X_test_df_norm = X_test_ds_norm.to_dataframe().dropna()
y_df_norm = y_ds_norm.to_dataframe().dropna()
y_train_df_norm = y_train_ds_norm.to_dataframe().dropna()
y_test_df_norm = y_test_ds_norm.to_dataframe().dropna()

### Create Numpy Arrays for Original Data

In [None]:
X_original = X_df.to_numpy()         
y_original = y_df.to_numpy().ravel() 
X_train_original = X_train_df.to_numpy() 
y_train_original = y_train_df.to_numpy().ravel()
X_test_original = X_test_df.to_numpy()  
y_test_original = y_test_df.to_numpy().ravel()

# Create Normalized Dataframes

## First Method of Normalization

*Note: Done here, as opposed to in test/train split, so that I can save the original train/test datasets and later determine whether normalization led to improvement. (My group has not historically normalized data before training.)*

ALSO note, done after the above test on original dataframes because method overwrites original dataframes, need to debug this

In [None]:
#X_df_norm = (X_df - X_df.mean())/X_df.std() 
#y_df_norm = (y_df - y_df.mean())/y_df.std()
#X_train_df_norm = (X_train_df - X_train_df.mean())/X_train_df.std()
#y_train_df_norm = (y_train_df - y_train_df.mean())/y_train_df.std()
#X_test_df_norm = (X_test_df - X_test_df.mean())/X_test_df.std()
#y_test_df_norm = (y_test_df - y_test_df.mean())/y_test_df.std()

## Second Method of Normalization (Previously Implemented and Saved) 

- This way, time and lat/lon conversions are not normalized
- Note that coordinates don't seem to be input into ML algorithms, so T0,T1,A,B,C are the inputs of time and space (as would want, don't want two forms of time and space input)

In [None]:
#X_df_norm = X_df 
#y_df_norm = y_df 
#X_train_df_norm = X_train_df 
#y_train_df_norm = y_train_df  
#X_test_df_norm = X_test_df  
#y_test_df_norm = y_test_df  

In [None]:
#for df in X_df_norm, X_train_df_norm, X_test_df_norm:
#    df.SSS = (df.SSS - df.SSS.mean())/df.SSS.std() 
#    df.SST = (df.SST - df.SST.mean())/df.SST.std() 
#    df.MLD = (df.MLD - df.MLD.mean())/df.MLD.std() 
#    df.Chl = (df.Chl - df.Chl.mean())/df.Chl.std() 
#    df.XCO2 = (df.XCO2 - df.XCO2.mean())/df.XCO2.std() 

#for df in y_df_norm, y_train_df_norm, y_test_df_norm:
#    df.pCO2 = (df.pCO2 - df.pCO2.mean())/df.pCO2.std() 

In [None]:
#X_train_df_norm

In [None]:
#X_train_df

Recreate original dataframes, as they are getting overwritten by the above process 

In [None]:
#X_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X.nc').to_dataframe().dropna()
#y_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y.nc').to_dataframe().dropna() 
#X_train_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X_train.nc').to_dataframe().dropna() 
#y_train_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y_train.nc').to_dataframe().dropna()
#X_test_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X_test.nc').to_dataframe().dropna() 
#y_test_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y_test.nc').to_dataframe().dropna() 

## Third (Updated) Method of Normalization

- Surprised at lack of improvement from normalization (as done below using the second method above)  
- Problem may be coming from the use of a global mean/std, as opposed to one specific to each latitude and longitude point. 
- Replaced normalized numpy arrays and dataframes previously run and saved (names normdf, normdf2, normnumpy, and normnumpy2) with updated ones for each coordinate point (names normdfupdated and normnumpyupdated)

## Create Numpy Arrays 

In [None]:
X_n = X_df_norm.to_numpy()         
y_n = y_df_norm.to_numpy().ravel() 
X_train_n = X_train_df_norm.to_numpy()
y_train_n = y_train_df_norm.to_numpy().ravel()
X_test_n = X_test_df_norm.to_numpy() 
y_test_n = y_test_df_norm.to_numpy().ravel()

# Preliminary NN Model using Original (Not Normalized) Input

## Build NN Model

In [None]:
# NOTE that number of input layer neurons must correspond to number of predictor variables

In [None]:
# set hyperparameters
n_neuron       = 64
activation     = 'LeakyReLU'
num_epochs     = 50
learning_rate  = 0.001
minibatch_size = 64
model_num      = 1

In [None]:
y_train_df.shape[1]

In [None]:
NN_model = Sequential()

NN_model.add(Dense(n_neuron, name='hidden_layer_1', activation=activation,input_shape=(X_train_df.shape[1],))) #  the 1st hidden layer 
NN_model.add(Dense(n_neuron, name='hidden_layer_2', activation=activation)) # the 2nd hidden layer
NN_model.add(Dense(n_neuron, name='hidden_layer_3', activation=activation)) # the 3rd hidden layer
NN_model.add(Dense(1, name='output_layer', activation='linear')) # the output layer


NN_model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model.summary()

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


history = NN_model.fit(X_train_df, y_train_df, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

## Initial Checks

Can see that validation loss reaches minimums and jumps around, plot to see if trend indicates overfitting

In [None]:
model_utils.plot_history(history)
plt.title('Preliminary NN with Original Dataframe Input')

- Indicates that the model isn't generalizing well
- While the training loss goes down substantially, the validation loss does not show the same trend. Instead, it remains (relatively) high and jumps around.
    - Completely fine, just signals that hyperparameter tuning will be needed (as expected)

In [None]:
#Normalize so more comparable to other plot

In [None]:
#history.history.keys()

In [None]:
#norm_loss = (history.history['loss'] - np.mean(history.history['loss'])) / np.std(history.history['loss'])

In [None]:
#norm_val_loss = (history.history['val_loss'] - np.mean(history.history['val_loss'])) / np.std(history.history['val_loss'])

In [None]:
#x_ax = history.epoch
#plt.plot(x_ax, norm_loss, label="training")
#plt.plot(x_ax, norm_val_loss, label="validation")
#plt.title("Normalized Training and Validation Loss over Epochs")
#plt.legend()
#plt.show()

## Save NN Model

In [None]:
# after training, save:
NN_model.save(os.path.join(recon_model_path,'NN_model_prelim_originaldf2.h5'))

## Test NN Model

In [None]:
# then reload before start working with test data
NN_prelim_model = load_model(os.path.join(recon_model_path,'NN_model_prelim_originaldf2.h5'))

In [None]:
y_pred = NN_prelim_model.predict(X_test_df)
mse = mean_squared_error(y_test_df, y_pred)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test_df))
plt.plot(x_ax, y_test_df, label="original")
plt.plot(x_ax, y_pred, label="predicted")
plt.title("pC02 Test and Predicted Data for Preliminary NN with Original Dataframe Input")
plt.legend()
plt.show()

# Preliminary NN Model using Original Numpy Input

## Build NN Model

In [None]:
# set hyperparameters
n_neuron       = 64
activation     = 'LeakyReLU'
num_epochs     = 50
learning_rate  = 0.001
minibatch_size = 64
model_num      = 1

In [None]:
X_train_original.shape[1]

In [None]:
NN_model_on = Sequential()

NN_model_on.add(Dense(n_neuron,  activation=activation,input_shape=(X_train_original.shape[1],))) #  the 1st hidden layer 
NN_model_on.add(Dense(n_neuron,  activation=activation)) # the 2nd hidden layer
NN_model_on.add(Dense(n_neuron,  activation=activation)) # the 3rd hidden layer
NN_model_on.add(Dense(1,  activation='linear')) # the output layer


NN_model_on.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model_on.summary()

## Train NN Model

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


history_on = NN_model_on.fit(X_train_original, y_train_original, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

## Initial Checks

In [None]:
model_utils.plot_history(history_on)
plt.title('Preliminary NN with Original Numpy Input')

## Save NN Model

In [None]:
# after training, save:
NN_model_on.save(os.path.join(recon_model_path,'NN_model_prelim_originalnumpy2.h5'))

## Test NN Model

In [None]:
NN_model_on = load_model(os.path.join(recon_model_path,'NN_model_prelim_originalnumpy2.h5'))

In [None]:
y_pred_on = NN_model_on.predict(X_test_original)
mse = mean_squared_error(y_test_original, y_pred_on)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test_original))
plt.plot(x_ax, y_test_original, label="original")
plt.plot(x_ax, y_pred_on, label="predicted")
plt.title("pC02 Test and Predicted Data for Preliminary NN with Original Numpy Input")
plt.legend()
plt.show()

# Preliminary NN Model using Normalized Input

## Build NN Model

In [None]:
# set hyperparameters
n_neuron       = 64
activation     = 'LeakyReLU'
num_epochs     = 50
learning_rate  = 0.001
minibatch_size = 64
model_num      = 1

In [None]:
X_train_df_norm.shape[1]

In [None]:
NN_model_nprelim = Sequential()

NN_model_nprelim.add(Dense(n_neuron,  activation=activation,input_shape=(X_train_df_norm.shape[1],))) #  the 1st hidden layer 
NN_model_nprelim.add(Dense(n_neuron,  activation=activation)) # the 2nd hidden layer
NN_model_nprelim.add(Dense(n_neuron,  activation=activation)) # the 3rd hidden layer
NN_model_nprelim.add(Dense(1,  activation='linear')) # the output layer


NN_model_nprelim.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model_nprelim.summary()

## Train NN Model

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


history_nprelim = NN_model_nprelim.fit(X_train_df_norm, y_train_df_norm, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

## Initial Checks

In [None]:
model_utils.plot_history(history_nprelim)
plt.title('Preliminary NN with Normalized Dataframe Input')

## Save NN Model

In [None]:
# after training, save:
NN_model_nprelim.save(os.path.join(recon_model_path,'NN_model_prelim_normdfupdated.h5'))

## Test NN Model

In [None]:
NN_model_nprelim = load_model(os.path.join(recon_model_path,'NN_model_prelim_normdfupdated.h5'))

In [None]:
y_pred_nprelim = NN_model_nprelim.predict(X_test_df_norm)
mse = mean_squared_error(y_test_df_norm, y_pred_nprelim)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test_df_norm))
plt.plot(x_ax, y_test_df_norm, label="original")
plt.plot(x_ax, y_pred_nprelim, label="predicted")
plt.title("pC02 Test and Predicted Data for Preliminary NN with Normalized Dataframe Input")
plt.legend()
plt.show()

# Preliminary NN Model using Normalized Numpy Input

## Build NN Model

In [None]:
# set hyperparameters
n_neuron       = 64
activation     = 'LeakyReLU'
num_epochs     = 50
learning_rate  = 0.001
minibatch_size = 64
model_num      = 1

In [None]:
X_train_n.shape[1]

In [None]:
NN_model_nn = Sequential()

NN_model_nn.add(Dense(n_neuron,  activation=activation,input_shape=(X_train_n.shape[1],))) #  the 1st hidden layer 
NN_model_nn.add(Dense(n_neuron,  activation=activation)) # the 2nd hidden layer
NN_model_nn.add(Dense(n_neuron,  activation=activation)) # the 3rd hidden layer
NN_model_nn.add(Dense(1,  activation='linear')) # the output layer


NN_model_nn.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model_nn.summary()

## Train NN Model

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


history_nn = NN_model_nn.fit(X_train_n, y_train_n, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

## Initial Checks

In [None]:
model_utils.plot_history(history_nn)
plt.title('Preliminary NN with Normalized Numpy Input')

## Save NN Model

In [None]:
# after training, save:
NN_model_nn.save(os.path.join(recon_model_path,'NN_model_prelim_normnumpyupdated.h5'))

## Test NN Model

In [None]:
NN_model_nn = load_model(os.path.join(recon_model_path,'NN_model_prelim_normnumpyupdated.h5'))

In [None]:
y_pred_nn = NN_model_nn.predict(X_test_n)
mse = mean_squared_error(y_test_n, y_pred_nn)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test_n))
plt.plot(x_ax, y_test_n, label="original")
plt.plot(x_ax, y_pred_nn, label="predicted")
plt.title("pC02 Test and Predicted Data for Preliminary NN with Normalized Numpy Input")
plt.legend()
plt.show()

# Select Approach & Parameters

## Approach

- Based on preliminary experimentation, will train the NN model on original dataframes
- Comparison completed above for group reference, as data has not historically been normalized prior to training
- Regarding normalization
    - Likely need to debug or investigate normalizing specific input parameters
    - For both the dataframe and numpy normalized inputs, validation losses did not decrease with training losses. This could mean overfitting to the training set; maybe normalization makes it "too easy" for the algorithm to learn the training set.
- Regarding dataframe vs numpy array
    - The difference between dataframe and numpy input is likely not significant, as runs for original and normalized data were similar. 
    - NN based on original dataframe input had a slightly higher MSE than that based on the numpy input, but the difference likely was not significant/could be due to random variation in runs. 
    - Visually, use of the original dataframe resulted in more closely aligned predicted pCO2, particularly at the extremes. 

## Parameters

Experimentation with hyperparameters completed below

### Experiment with NN Model

In [None]:
# First Run
#n_neuron       = 100
#activation     = 'LeakyReLU'
#num_epochs     = 100
#learning_rate  = 0.002
#minibatch_size = 64
#model_num      = 1
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# loss: 124.3420 - val_loss: 289.1165
# MSE: 181.18
# stopped after 91 epochs 

In [None]:
# Second Run
#n_neuron       = 64
#activation     = 'LeakyReLU'
#num_epochs     = 100
#learning_rate  = 0.0015
#minibatch_size = 64
#model_num      = 1
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)
#NN_model_exp.add(Dense(n_neuron, activation=activation, name='hidden_layer_4')) # added a 4th layer

# loss: mid 100s - val_loss: mid 300s
# MSE: 228.09
# stopped after 49 epochs 

In [None]:
# Third Run
#n_neuron       = 64
#activation     = 'LeakyReLU'
#num_epochs     = 100
#learning_rate  = 0.0005
#minibatch_size = 64
#model_num      = 1
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
#NN_model_exp.add(Dense(n_neuron, activation=activation, name='hidden_layer_4')) # added a 4th layer

#loss: 193.3637 - val_loss: 375.5946
# MSE:
# stopped after 40 epochs, but at start was much more aligned

In [None]:
# 4th and 5th runs
#n_neuron       = 64
#activation     = 'ReLU'
#num_epochs     = 100
#learning_rate  = 0.0005
#minibatch_size = 64
#model_num      = 1
# and made only two hidden layers 
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# MSE: 289.26, loss: 257.6627 - val_loss: 366.7357 but trend MUCH more aligned, up patience from 10 to 20, and try Relu

# MSE: 267.70, loss: 225.0135 - val_loss: 318.7133, trend aligned even more, through almost all epochs

In [None]:
# 6th run
#n_neuron       = 100
#activation     = 'ReLU'
#num_epochs     = 100
#learning_rate  = 0.0008
#minibatch_size = 64
#model_num      = 1
# and made only two hidden layers 
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# ran for 74 epochs
# MSE: 255.24, loss: 180.7052 - val_loss: 294.3801 
# train/val trend not quite as aligned as previous run
# go back to first run and start again from there
# common between first run and this one: higher number of neurons, up again

In [None]:
# 7th Run and 8th Run
#n_neuron       = 150
#activation     = 'ReLU'
#num_epochs     = 100
#learning_rate  = 0.002
#minibatch_size = 64
#model_num      = 1
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# loss: 178.2955 - val_loss: 316.7465
# definitely not, trend not great again --> try with Relu instead of Leaky 
# MSE: 219.80, 162.3405 - val_loss: 343.4025

In [None]:
# 9th Run
#n_neuron       = 100
#activation     = 'ReLU'
#num_epochs     = 100
#learning_rate  = 0.002
#minibatch_size = 64
#model_num      = 1
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# MSE: 208.60, loss: 170.7158 - val_loss: 282.2546
# good train/val loss alignment but stopped at 64 epochs

In [None]:
# 10th Run
#n_neuron       = 100
#activation     = 'ReLU'
#num_epochs     = 100
#learning_rate  = 0.0005
#minibatch_size = 100
#model_num      = 1
#early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

#loss: 186.7061 - val_loss: 267.3068, MSE: 238.85
# but, best alignment between train and val loss so far
# take best run so far and increase batch size

Take best trends of all runs
- Relu over LeakyRelu
- Increase mini batch size to 100
- Don't alter neurons (worse when increase and decrease)
- Maintain learning rate, but maybe could go down, 0.0005 did well

In [None]:
# 11th and 12th Run and 13th run **12th is saved, 13th just swapped back to Leaky Relu **was worse, back to run 12
n_neuron       = 100
activation     = 'ReLU'
num_epochs     = 100
learning_rate  = 0.0005 # 0.002 definitely overtrained, val didn't decrease after approx 20 epochs, loss: 144.7867 - val_loss: 389.3859, MSE 208
minibatch_size = 100
model_num      = 1
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

# loss: 143.5514 - val_loss: 239.9906
# MSE: 192.47
# ran all 100 epochs, and really consistent train val losses

In [None]:
NN_model_exp = Sequential()

NN_model_exp.add(Dense(n_neuron, activation=activation, name='hidden_layer_1', input_shape=(X_train_df.shape[1],))) #  the 1st hidden layer 
NN_model_exp.add(Dense(n_neuron, activation=activation, name='hidden_layer_2')) # the 2nd hidden layer
NN_model_exp.add(Dense(n_neuron, activation=activation, name='hidden_layer_3')) 
NN_model_exp.add(Dense(1, activation='linear', name='output_layer')) # the output layer


NN_model_exp.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model_exp.summary()

### Train Experimental NN Model

In [None]:
history_exp = NN_model_exp.fit(X_train_df, y_train_df, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

### Initial Checks on Experimental NN Model

In [None]:
model_utils.plot_history(history_exp)

In [None]:
y_pred_exp = NN_model_exp.predict(X_test_df)
mse = mean_squared_error(y_test_df, y_pred_exp)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test_df))
plt.plot(x_ax, y_test_df, label="original")
plt.plot(x_ax, y_pred_exp, label="predicted")
plt.title("pC02 test and predicted data")
plt.legend()
plt.show()

### Save Best Experimental NN Model as First of NN Below

In [None]:
# after training, save:
NN_model_exp.save(os.path.join(recon_model_path,'NN_model1.h5'))

# Five Identical NN Models 

## NN Model 1

In [None]:
NN_model1 = load_model(os.path.join(recon_model_path,'NN_model1.h5'))

## NN Model 2

## NN Model 3

## NN Model 4

## NN Model 5