# GR5074 - Projects in Advanced Machine Learning
# Spring 2022

---

</br>


### Predict World Happiness Rankings (con't) 

Since you are now familiar with the World Happiness Rankings data, and have played a bit with predictions on traditional ML models, we're ready for the next step with Deep Learning. We're simplifying the AI Model Share code a bit so we can do some in-class illustrations of a fully-connected neural network, and some manipulations to its architecture, including regularization.

# 1. General Setup

In [None]:
# install aimodelshare library
! pip install aimodelshare --upgrade

In [None]:
# [after class] add all the libraries/classes/functions you imported here
# to run Keras

import aimodelshare as ai
from aimodelshare import download_data 
from aimodelshare.aws import set_credentials
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer



In [None]:
# [after class] add all the definitions you are using throughout this notebook

apiurl = "https://c3maq947kb.execute-api.us-east-1.amazonaws.com/prod/m"


In [None]:
# get competition data

download_data('public.ecr.aws/y2e2a1d6/world_happiness_competition_data-repository:latest')

In [None]:
# Load data

X_train = pd.read_csv('world_happiness_competition_data/X_train.csv')
X_test = pd.read_csv('world_happiness_competition_data/X_test.csv')
y_train = pd.read_csv('world_happiness_competition_data/y_train.csv')
y_train_labels = y_train.idxmax(axis=1)

X_train.head()

In [None]:
# Set credentials using modelshare.org username/password

set_credentials(apiurl=apiurl)

In [None]:
# Instantiate Competition

mycompetition = ai.Competition(apiurl)

# 2. Pre-process data and save pre-processor function


In [None]:
# We create the preprocessing pipelines for both numeric and categorical data.

numeric_features = X_train.drop(
    ['Country or region', 'name', 'region', 'sub-region'], 
    axis=1
    )
numeric_features = numeric_features.columns.tolist()

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_features = ['region', 'sub-region']

# Replacing missing values with Modal value and then one hot encoding.
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# final preprocessor object set up with ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Fit your preprocessor object
preprocess = preprocessor.fit(X_train)

In [None]:
# Write function to transform data with preprocessor

def preprocessor(data):
    data.drop(['Country or region', 'name'], axis=1)
    preprocessed_data = preprocess.transform(data)
    return preprocessed_data

In [None]:
# check shape of X data after preprocessing it using our new function

preprocessor(X_train).shape

In [None]:
# save preprocessor function

ai.export_preprocessor(preprocessor,"")

# 3. Fit models on preprocessed data and save preprocessor function and model

In [None]:
# import Keras functions and classes we will need 

from tensorflow.keras.optimizers import SGD
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout, BatchNormalization
from keras.regularizers import l1, l2, l1_l2
#from keras.callbacks import EarlyStopping, ModelCheckpoint


In [None]:
# some useful definitions in a single location

feature_count = preprocessor(X_train).shape[1] # count of features in input data
loss = 'categorical_crossentropy'
optimizer = 'sgd'
epochs = 300


In [None]:
feature_count

## 3.1 Let's start with a simple fully-connected neural network (aka MLP)

In [None]:
# Let's start with a very simple fully-connected network (MLP)

keras_model = Sequential()
keras_model.add(Dense(64, input_dim=feature_count, activation='sigmoid'))
keras_model.add(Dense(64, activation='sigmoid'))
keras_model.add(Dense(64, activation='sigmoid'))
keras_model.add(Dense(5, activation='softmax'))       # why 5 nodes here? 
                                                      # why softmax?

# TRY: other activation functions to try: 'relu', 'tanh' 

# Compile model
keras_model.compile(
    loss=loss, 
    optimizer=optimizer, 
    metrics=['accuracy']     # add other metrics?
    )

# Fitting the NN to the Training set
keras_model.fit(
    preprocessor(X_train), 
    y_train, 
    epochs=epochs 
    )

In [52]:
# Save model to ONNX file 

from aimodelshare.aimsonnx import model_to_onnx

onnx_model = model_to_onnx(
    keras_model,
    framework='keras',
    transfer_learning=False,
    deep_learning=True        # note this change ;)
    )  

with open("model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

In [None]:
# Submit model: 

#-- Generate predicted y values
# Note: Keras predict returns the predicted column index location for classification models
prediction_column_index = keras_model.predict(preprocessor(X_test)).argmax(axis=1)

# extract correct prediction labels 
prediction_labels = [y_train.columns[i] for i in prediction_column_index]

# Submit Model 1 to Competition Leaderboard
mycompetition.submit_model(
    model_filepath = "model.onnx",
    preprocessor_filepath="preprocessor.zip",
    prediction_submission=prediction_labels
    )

## 3.2 Does Dropout (regularization) during model training help? 

Dropout regularization can help ensure that we are not overfitting the model to training data.  

Dropout consists in randomly setting a fraction rate of input units to 0 at each update during training time, which helps prevent overfitting.

In [None]:
# Model with Dropout regularization

model = Sequential()
model.add(Dense(64, input_dim=feature_count, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(5, activation='softmax')) 

# try changing proportion of dropout
# try selectively applying dropout in some layers
                                           
# Compile model
keras_model.compile(
    loss=loss, 
    optimizer=optimizer, 
    metrics=['accuracy']
    )

# Fitting the NN to the Training set
keras_model.fit(
    preprocessor(X_train), 
    y_train, 
    epochs=epochs
    )


In [None]:
# Save model to ONNX file 

onnx_model = model_to_onnx(
    keras_model,
    framework='keras',
    transfer_learning=False,
    deep_learning=True        
    )  

with open("model.onnx", "wb") as f:           
    f.write(onnx_model.SerializeToString())

In [None]:
# Submit model: 

# Generate predicted y values
prediction_column_index = keras_model.predict(preprocessor(X_test)).argmax(axis=1)

# extract correct prediction labels 
prediction_labels = [y_train.columns[i] for i in prediction_column_index]

# Submit Model 1 to Competition Leaderboard
mycompetition.submit_model(
    model_filepath = "model.onnx",
    preprocessor_filepath="preprocessor.zip",
    prediction_submission=prediction_labels
    )

## 3.2  Does L1 or L2 regularization during model training help?

Recall that L1 or L2 (or both together) add a constraint to the loss function that results in smaller parameters.  Shrinking parameters is a way to ensure that we do not overfit the model to training data.

Within keras there are three options: (default is None)
* kernel_regularizer(applied to weights),
* bias_regularizer(applied to bias unit), and 
* activity_regularizer(applied to layer activation).

In [None]:
## Add L2 regularization

model = Sequential()
model.add(Dense(64, input_dim=feature_count, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(5, activation='softmax')) 

# TRY: turning regularizers off in some layers or keeping them only in one layer 
                                           
# Compile model
keras_model.compile(
    loss=loss, 
    optimizer=optimizer, 
    metrics=['accuracy']
    )

# Fitting the NN to the Training set
keras_model.fit(
    preprocessor(X_train), 
    y_train, 
    epochs=epochs
    )


In [None]:
# Save model to ONNX file 

onnx_model = model_to_onnx(
    keras_model,
    framework='keras',
    transfer_learning=False,
    deep_learning=True        
    )  

with open("model.onnx", "wb") as f:           
    f.write(onnx_model.SerializeToString())

In [None]:
# Submit model: 

# Generate predicted y values
prediction_column_index = keras_model.predict(preprocessor(X_test)).argmax(axis=1)

# extract correct prediction labels 
prediction_labels = [y_train.columns[i] for i in prediction_column_index]

# Submit Model 1 to Competition Leaderboard
mycompetition.submit_model(
    model_filepath = "model.onnx",
    preprocessor_filepath="preprocessor.zip",
    prediction_submission=prediction_labels
    )

## 3.3 Lets use Batch Normalization to normalize the weights 

What happens if we standardize the values of our data with z scores (slightly adjust such that the results rarely equal zero) using Batch Normalization?

Batch normalization can be used to standardize the values of weights + bias (the resulting scalar values) before inserting the result into an activation transformation.

They can also be used to standardize the output of an activation function AFTER it has been transformed by the activation function (e.g.-sigmoid(sum of weights plus bias)

In [None]:
model = Sequential()
model.add(Dense(64, input_dim=feature_count))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(64))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(5, activation='softmax')) 

# TRY: turning on Batch normalization for some layer and off for others

                                           
# Compile model
keras_model.compile(
    loss=loss, 
    optimizer=optimizer, 
    metrics=['accuracy']
    )

# Fitting the NN to the Training set
keras_model.fit(
    preprocessor(X_train), 
    y_train, 
    epochs=epochs 
    )
   

In [None]:
# Save model to ONNX file 

onnx_model = model_to_onnx(
    keras_model,
    framework='keras',
    transfer_learning=False,
    deep_learning=True        
    )  

with open("model.onnx", "wb") as f:           
    f.write(onnx_model.SerializeToString())

In [None]:
# Submit model: 

# Generate predicted y values
prediction_column_index = keras_model.predict(preprocessor(X_test)).argmax(axis=1)

# extract correct prediction labels 
prediction_labels = [y_train.columns[i] for i in prediction_column_index]

# Submit Model 1 to Competition Leaderboard
mycompetition.submit_model(
    model_filepath = "model.onnx",
    preprocessor_filepath="preprocessor.zip",
    prediction_submission=prediction_labels
    )

## 3.4 Experimenting with depth of model (number of hidden layers)

In [None]:
# Model with L2 regularization

model = Sequential()
model.add(Dense(64, input_dim=feature_count, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(5, activation='softmax')) 

# TRY: reducing the number of nodes and/or turning regularizers off in some layers 


# Compile model
keras_model.compile(
    loss=loss, 
    optimizer=optimizer, 
    metrics=['accuracy']
    )


# Fitting the NN to the Training set
keras_model.fit(
    preprocessor(X_train), 
    y_train, 
    epochs=epochs, 
    )



In [None]:
# Save model to ONNX file 

onnx_model = model_to_onnx(
    keras_model,
    framework='keras',
    transfer_learning=False,
    deep_learning=True        
    )  

with open("model.onnx", "wb") as f:           
    f.write(onnx_model.SerializeToString())

In [None]:
# Submit model: 

# Generate predicted y values
prediction_column_index = keras_model.predict(preprocessor(X_test)).argmax(axis=1)

# extract correct prediction labels 
prediction_labels = [y_train.columns[i] for i in prediction_column_index]

# Submit Model 1 to Competition Leaderboard
mycompetition.submit_model(
    model_filepath = "model.onnx",
    preprocessor_filepath="preprocessor.zip",
    prediction_submission=prediction_labels
    )

## 3.5 experimenting with an increase in epochs


In [None]:
# Model with best L2 regularization run for three times the epochs

model = Sequential()
model.add(Dense(64, input_dim=feature_count, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dense(5, activation='softmax')) 

# TRY: reduce epochs to 20, 50

# Compile model
keras_model.compile(
    loss=loss, 
    optimizer=optimizer, 
    metrics=['accuracy']
    )


# Fitting the NN to the Training set
keras_model.fit(
    preprocessor(X_train), 
    y_train, 
    epochs=1000 # note increase here 
    )
