# Basic Neural Networks 

(to compare pre-transfer learning)

In [None]:
# Standard imports
import os
import datetime
from pathlib import Path
from collections import defaultdict
import scipy
import random
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import pickle

# Machine learning libraries
import sklearn            # machine-learning libary with many algorithms implemented
#import xgboost as xgb     # extreme gradient boosting (XGB)
#from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import *
from tensorflow.keras import Sequential

# Python file with supporting functions
import model_utils

In [None]:
recon_model_path = '/home/julias/MLEE-final-project/models/saved_models/recon_models'

# Load Split Datasets and Create Versions for Experimentation

## Load Split Datasets

In [None]:
X_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X.nc').to_dataframe().dropna()
y_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y.nc').to_dataframe().dropna() 
X_train_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X_train.nc').to_dataframe().dropna() 
y_train_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y_train.nc').to_dataframe().dropna()
X_test_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/X_test.nc').to_dataframe().dropna() 
y_test_df = xr.open_dataset('/home/julias/MLEE-final-project/proc_data/split_datasets/y_test.nc').to_dataframe().dropna() 

Check that data was saved and loaded properly:

In [None]:
X_train_df

In [None]:
y_test_df

### Create Numpy Arrays for Original Data

In [None]:
X_original = X_df.to_numpy()         
y_original = y_df.to_numpy().ravel() 
X_train_original = X_train_df.to_numpy() 
y_train_original = y_train_df.to_numpy().ravel()
X_test_original = X_test_df.to_numpy()  
y_test_original = y_test_df.to_numpy().ravel()

## Create Normalized Dataframes

*Note: Done here, as opposed to in test/train split, so that I can save the original train/test datasets and later determine whether normalization led to improvement. (My group has not historically normalized data before training.)*

In [None]:
#X_df_norm = (X_df - X_df.mean())/X_df.std() 
#y_df_norm = (y_df - y_df.mean())/y_df.std()
#X_train_df_norm = (X_train_df - X_train_df.mean())/X_train_df.std()
#y_train_df_norm = (y_train_df - y_train_df.mean())/y_train_df.std()
#X_test_df_norm = (X_test_df - X_test_df.mean())/X_test_df.std()
#y_test_df_norm = (y_test_df - y_test_df.mean())/y_test_df.std()

CHANGE METHOD TO BELOW
- This way, time and lat/lon conversions are not normalized
- Note that coordinates don't seem to be input into ML algorithms, so T0,T1,A,B,C are the inputs of time and space (as would want, don't want two forms of time and space input)

In [None]:
X_df_norm = X_df
y_df_norm = y_df
X_train_df_norm = X_train_df
y_train_df_norm = y_train_df
X_test_df_norm = X_test_df
y_test_df_norm = y_test_df

for df in X_df_norm, X_train_df_norm, X_test_df_norm:
    df.SSS = (df.SSS - df.SSS.mean())/df.SSS.std() 
    df.SST = (df.SST - df.SST.mean())/df.SST.std() 
    df.MLD = (df.MLD - df.MLD.mean())/df.MLD.std() 
    df.Chl = (df.Chl - df.Chl.mean())/df.Chl.std() 
    df.XCO2 = (df.XCO2 - df.XCO2.mean())/df.XCO2.std() 

for df in y_df_norm, y_train_df_norm, y_test_df_norm:
    df.pCO2 = (df.pCO2 - df.pCO2.mean())/df.pCO2.std() 

In [None]:
X_train_df_norm

## Create Numpy Arrays 

In [None]:
X_n = X_df_norm.to_numpy()         
y_n = y_df_norm.to_numpy().ravel() 
X_train_n = X_train_df_norm.to_numpy()
y_train_n = y_train_df_norm.to_numpy().ravel()
X_test_n = X_test_df_norm.to_numpy() 
y_test_n = y_test_df_norm.to_numpy().ravel()

# Preliminary NN Model using Original (Not Normalized) Input

## Build NN Model

In [None]:
# NOTE that number of input layer neurons must correspond to number of predictor variables

In [None]:
# set hyperparameters
n_neuron       = 64
activation     = 'LeakyReLU'
num_epochs     = 50
learning_rate  = 0.001
minibatch_size = 64
model_num      = 1

In [None]:
y_train_df.shape[1]

In [None]:
NN_model = Sequential()

NN_model.add(Dense(n_neuron, name='hidden_layer_1', activation=activation,input_shape=(X_train.shape[1],))) #  the 1st hidden layer 
NN_model.add(Dense(n_neuron, name='hidden_layer_2', activation=activation)) # the 2nd hidden layer
NN_model.add(Dense(n_neuron, name='hidden_layer_3', activation=activation)) # the 3rd hidden layer
NN_model.add(Dense(1, name='output_layer', activation='linear')) # the output layer


NN_model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model.summary()

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


history = NN_model.fit(X_train_original, y_train_original, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

## Initial Checks

Can see that validation loss reaches minimums and jumps around, plot to see if trend indicates overfitting

In [None]:
model_utils.plot_history(history)

Validation loss does jump around and is generally higher than the training loss, indicating that the model isn't generalizing well

### Normalize so more comparable to other plot

In [None]:
history.history.keys()

In [None]:
norm_loss = (history.history['loss'] - np.mean(history.history['loss'])) / np.std(history.history['loss'])

In [None]:
norm_val_loss = (history.history['val_loss'] - np.mean(history.history['val_loss'])) / np.std(history.history['val_loss'])

In [None]:
x_ax = history.epoch
plt.plot(x_ax, norm_loss, label="training")
plt.plot(x_ax, norm_val_loss, label="validation")
plt.title("Normalized Training and Validation Loss over Epochs")
plt.legend()
plt.show()

## Save NN Model

In [None]:
# after training, save:
NN_model.save(os.path.join(recon_model_path,'NN_model_oprelim_1.h5'))

## Test NN Model

In [None]:
# then reload before start working with test data
NN_prelim_model = load_model(os.path.join(recon_model_path,'NN_model_oprelim_1.h5'))

In [None]:
y_pred = NN_prelim_model.predict(X_test_df)
mse = mean_squared_error(y_test_df, y_pred)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test_df))
plt.plot(x_ax, y_test_df, label="original")
plt.plot(x_ax, y_pred, label="predicted")
plt.title("pC02 test and predicted data")
plt.legend()
plt.show()

# Preliminary NN Model using Normalized Input

## Build NN Model

In [None]:
# set hyperparameters
n_neuron       = 64
activation     = 'LeakyReLU'
num_epochs     = 50
learning_rate  = 0.001
minibatch_size = 64
model_num      = 1

In [None]:
NN_model_nprelim = Sequential()

NN_model_nprelim.add(Dense(n_neuron,  activation=activation,input_shape=(X_train.shape[1],))) #  the 1st hidden layer 
NN_model_nprelim.add(Dense(n_neuron,  activation=activation)) # the 2nd hidden layer
NN_model_nprelim.add(Dense(n_neuron,  activation=activation)) # the 3rd hidden layer
NN_model_nprelim.add(Dense(1,  activation='linear')) # the output layer


NN_model_nprelim.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model_nprelim.summary()

## Train NN Model

In [None]:
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)


history_nprelim = NN_model_nprelim.fit(X_train, y_train, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

## Initial Checks

Can see that validation loss reaches minimums and jumps around, plot to see if trend indicates overfitting

In [None]:
model_utils.plot_history(history_nprelim)

While the training loss goes down substantially, the validation loss does not show the same trend. Instead, it remains (relatively) high and jumps around.
- Completely fine, just signals that hyperparameter tuning will be needed (as expected)

### Normalize so more comparable to other plot

In [None]:
norm_nloss = (history_nprelim.history['loss'] - np.mean(history_nprelim.history['loss'])) / np.std(history_nprelim.history['loss'])

In [None]:
norm_val_nloss = (history_nprelim.history['val_loss'] - np.mean(history_nprelim.history['val_loss'])) / np.std(history_nprelim.history['val_loss'])

In [None]:
x_ax = history_nprelim.epoch
plt.plot(x_ax, norm_nloss, label="training")
plt.plot(x_ax, norm_val_nloss, label="validation")
plt.title("Normalized Training and Validation Loss over Epochs")
plt.legend()
plt.show()

## Save NN Model

In [None]:
# after training, save:
NN_model_nprelim.save(os.path.join(recon_model_path,'NN_model_nprelim_1.h5'))

## Test NN Model

In [None]:
NN_model_nprelim = load_model(os.path.join(recon_model_path,'NN_model_nprelim_1.h5'))

In [None]:
y_pred_nprelim = NN_model_nprelim.predict(X_test)
mse = mean_squared_error(y_test, y_pred_nprelim_rerun)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test))
plt.plot(x_ax, y_test, label="original")
plt.plot(x_ax, y_pred_nprelim, label="predicted")
plt.title("pC02 test and predicted data")
plt.legend()
plt.show()

# Select Approach & Parameters

## Approach

- Based on preliminary experimentation, will train the NN model on normalized data, as is standard machine learning practice
- Comparison completed above for group reference, as data has not historically been normalized prior to training

## Parameters

Experimentation with hyperparameters completed below

### Experiment with NN Model

In [None]:
# set hyperparameters
n_neuron       = 100
activation     = 'LeakyReLU'
num_epochs     = 100
learning_rate  = 0.002
minibatch_size = 64
model_num      = 1
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

In [None]:
NN_model_exp = Sequential()

NN_model_exp.add(Dense(n_neuron,  activation=activation, name='hidden_layer_1', input_shape=(X_train.shape[1],))) #  the 1st hidden layer 
NN_model_exp.add(Dense(n_neuron,  activation=activation, name='hidden_layer_2')) # the 2nd hidden layer
NN_model_exp.add(Dense(n_neuron,  activation=activation, name='hidden_layer_3')) # the 3rd hidden layer
NN_model_exp.add(Dense(1,  activation='linear', name='output_layer')) # the output layer


NN_model_exp.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))

In [None]:
NN_model_exp.summary()

### Train Experimental NN Model

In [None]:
history_exp = NN_model_exp.fit(X_train, y_train, 
                            batch_size      = minibatch_size,
                            epochs          = num_epochs,
                            validation_split= 0.2, 
                            verbose         = 1,
                            callbacks       = [early_stop])

### Initial Checks on Experimental NN Model

In [None]:
model_utils.plot_history(history_exp)

In [None]:
y_pred_exp = NN_model_exp.predict(X_test)
mse = mean_squared_error(y_test, y_pred_exp)
print("MSE: %.2f" % mse)

In [None]:
x_ax = range(len(y_test))
plt.plot(x_ax, y_test, label="original")
plt.plot(x_ax, y_pred_exp, label="predicted")
plt.title("pC02 test and predicted data")
plt.legend()
plt.show()

### Save Best Experimental NN Model as First of NN Below

In [None]:
# after training, save:
NN_model_exp.save(os.path.join(recon_model_path,'NN_model1.h5'))

# Five Identical NN Models 

## NN Model 1

In [None]:
NN_model1 = load_model(os.path.join(recon_model_path,'NN_model1.h5'))

## NN Model 2

## NN Model 3

## NN Model 4

## NN Model 5