# Import

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import keras
import math
import datetime, os
from keras.callbacks import EarlyStopping
from sklearn import preprocessing
from sklearn.model_selection import KFold
from functions.common_function import *
from functions.initialize_model import initialize_model_expanded
from scipy.stats import pearsonr

'''Enabling plotting of graphs just below the plotting commands'''
%matplotlib inline
'''Enabling the disply of all rows and columns within the dataframe'''
pd.set_option("display.max_rows", None, "display.max_columns", None)


# Constant

In [None]:
num_feature = 8
cat_col = [4, 5]
num_ori_feature = num_feature - len(cat_col)
num_target = 3
bandwidth = 100
num_epochs = 10000
num_batch = 10
num_kfold = 10
directory_name = "Choudhury_Imitation_Expanded"

limit = pd.DataFrame({'lower' : [303, 20, 0, 2, 0, 0, 0, 0, 122, 1236, 14], \
                     'higher' : [840, 44, 17, 5, 1, 1, 1, 1, 408, 3240, 101], \
                     'ref' : [530, 40, 14, 3.2, np.nan, np.nan,np.nan, np.nan, np.nan, np.nan, np.nan]})

'''Import dataset'''
dataset = pd.read_csv("Dataset/Choudhury_Expanded_Dataset.csv")

# Implementation

In [None]:
'''Converting Categorical Data into binary representation'''
converted_dataset = convert_cat(dataset, cat_col, num_ori_feature, num_target, [dataset.iloc[:, 4].unique(), dataset.iloc[:, 5].unique()])

'''Normalising dataset according to higher and lower limit values'''
normalised_dataset = normalise(converted_dataset, limit)

'''Feature Target Splitting'''
feature, target = x_y_split(normalised_dataset, num_feature, num_target)

kfold = KFold(n_splits = num_kfold, shuffle = True)
fold_no = 1
MAE = []
MSE = []
RMSE = []

'''Cross Validation'''
for train, val in kfold.split(feature, target):
    model = initialize_model_expanded(num_feature, num_target, 'sigmoid')
    
    '''Model Fitting'''
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    
       
    '''Initializing early stopping that prevents overfitting 
    and tensorboard for visualizing machine learning workflow'''
    early_stopping = EarlyStopping(monitor = 'loss', mode = 'min', verbose = 1, patience = 1800)
    logdir = os.path.join(f"logs/{directory_name}","ANN_" + str(fold_no))
    tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, \
                                                         histogram_freq = 1,
                                                         write_graph = True,\
                                                         write_images = True)
    
    history = model.fit(feature.iloc[train],\
                        target.iloc[train],
                        epochs = num_epochs,\
                        batch_size = num_batch,\
                        callbacks = [tensorboard_callback, early_stopping])
    
    '''Index 0 of result is represented by Mean Absolute Error'''
    result = model.evaluate(feature.iloc[val], target.iloc[val], batch_size = 128)
    MAE.append(result[0])
    MSE.append(result[1])
    RMSE.append(result[2])
    model.save(f"Model\{directory_name}\model_{fold_no}")
    print("Saved model to disk")
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {result[0]}')
    fold_no += 1

'''Provide average score'''
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(MAE)):
    print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - MAE: {MAE[i]} - MSE: {MSE[i]}- RMSE: {RMSE[i]}') 
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> MAE: {np.mean(MAE)} - Standard Deviation: {np.std(MAE)}')
print(f'> MSE: {np.mean(MSE)}')
print(f'> RMSE: {np.mean(RMSE)}')
print('------------------------------------------------------------------------')

# Loading of Model

In [None]:
'''Loads the Best Model Trained using Cross Validation'''
loaded_model = keras.models.load_model(f"Model\{directory_name}\model_{MAE.index(min(MAE)) + 1}")

print("Loaded model from disk")

'''Compilation of the model with its corresponding weights, followed by the evaluation of the model using test set'''
loaded_model.compile(loss = 'MeanAbsoluteError',\
                    optimizer = 'SGD',\
                    metrics = [tf.keras.metrics.MeanSquaredError(),\
                    tf.keras.metrics.RootMeanSquaredError()])

# Visualisation of Predictions using Best Model from Cross Validation

In [None]:
prediction = pd.DataFrame(loaded_model.predict(feature), columns = get_col_names(target))

'''Preparation to Rescale target values'''
min_y = limit.iloc[num_feature: num_feature + num_target, 0].to_list()
max_y = limit.iloc[num_feature: num_feature + num_target, 1].to_list()

corr_list = []
'''Tabulating the differences of Expected and Predictions made by the ANN Model'''
for i in range(len(feature)):
    '''Rescaling of normalised data'''
    expected = pd.DataFrame(inverse_transform(target.iloc[i].to_list(), max_y, min_y))
    predicted = pd.DataFrame(inverse_transform(prediction.iloc[i].to_list(), max_y, min_y))
    comparison_df = pd.concat([expected, predicted], axis = 1)
    comparison_df.columns = ['Expected', 'Predicted']
    comparison_df.index = get_col_names(target)
    display(comparison_df.style.set_caption(f"Element {i + 1}"))
    corr, _ = pearsonr(expected.iloc[:, 0].tolist(), predicted.iloc[:, 0].tolist())
    corr_list.append(corr)

'''Provide average score'''
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(corr_list)):
    print('------------------------------------------------------------------------')
    print(f'> Iteration {i+1} - Pearson Correlation: {corr_list[i]}') 
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Average Pearsons Correlation: {np.mean(corr_list)} - Standard Deviation: {np.std(corr_list)}')
print('------------------------------------------------------------------------')  