# Import

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from sklearn.model_selection import KFold
from functions.common_function import *
from functions.initialize_model import initialize_model_expanded
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr

'''Enabling plotting of graphs just below the plotting commands'''
%matplotlib inline
'''Enabling the disply of all rows and columns within the dataframe'''
pd.set_option("display.max_rows", None, "display.max_columns", None)

# Constant

In [2]:
num_feature = 8
cat_col = [4, 5]
num_ori_feature = num_feature - len(cat_col)
num_target = 3
limit = pd.DataFrame({'lower' : [303, 20, 0, 2, 0, 0, 0, 0, 122, 1236, 14], \
                     'higher' : [840, 44, 17, 5, 1, 1, 1, 1, 408, 3240, 101], \
                     'ref' : [530, 40, 14, 3.2, np.nan, np.nan,np.nan, np.nan, np.nan, np.nan, np.nan]})


'''Import dataset'''
dataset = pd.read_csv("Dataset/Choudhury_Expanded_Dataset.csv")

# Implementation

In [3]:
'''Converting Categorical Data into binary representation'''
converted_dataset = convert_cat(dataset, cat_col, num_ori_feature, num_target, [dataset.iloc[:, 4].unique(), dataset.iloc[:, 5].unique()])

'''Normalising dataset according to higher and lower limit values'''
normalised_dataset = normalise(converted_dataset, limit)

'''Feature Target Splitting'''
feature, target = x_y_split(normalised_dataset, num_feature, num_target)

'''Model Fitting'''
linear_reg_model = LinearRegression()
MAE = abs(cross_val_score(linear_reg_model, feature, target, cv= 10, scoring= 'neg_mean_absolute_error'))
MSE = abs(cross_val_score(linear_reg_model, feature, target, cv= 10, scoring= 'neg_mean_squared_error' ))
RMSE = abs(cross_val_score(linear_reg_model, feature, target, cv= 10, scoring= 'neg_root_mean_squared_error' ))


'''Provide average score'''
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(MAE)):
    print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - MAE: {MAE[i]} - MSE: {MSE[i]}- RMSE: {RMSE[i]}') 
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> MAE: {np.mean(MAE)} - Standard Deviation: {np.std(MAE)}')
print(f'> MSE: {np.mean(MSE)}')
print(f'> RMSE: {np.mean(RMSE)}')
print('------------------------------------------------------------------------')

------------------------------------------------------------------------
Score per fold
------------------------------------------------------------------------
> Fold 1 - MAE: 0.04486432406774848 - MSE: 0.0028517722274715046- RMSE: 0.050894602468578944
------------------------------------------------------------------------
> Fold 2 - MAE: 0.020671492940136982 - MSE: 0.0004934710769321526- RMSE: 0.02116473051317934
------------------------------------------------------------------------
> Fold 3 - MAE: 0.016441918389223906 - MSE: 0.0004374395915505121- RMSE: 0.020173816853077117
------------------------------------------------------------------------
> Fold 4 - MAE: 0.09321002303599273 - MSE: 0.011856846139160002- RMSE: 0.10267699907598456
------------------------------------------------------------------------
> Fold 5 - MAE: 0.052437406570096406 - MSE: 0.0034978681537334473- RMSE: 0.05341641995521726
------------------------------------------------------------------------
> Fold 6 -

In [4]:
linear_reg_model  = LinearRegression()
X_train, X_test, Y_train, Y_test = train_test_split(feature, target, test_size = 0.20, random_state = 0)
linear_reg_model.fit(X_train, Y_train)
Y_pred = linear_reg_model.predict(X_test)
prediction = pd.DataFrame(Y_pred, columns = get_col_names(target))

'''Preparation to Rescale target values'''
min_y = limit.iloc[num_feature: num_feature + num_target, 0].to_list()
max_y = limit.iloc[num_feature: num_feature + num_target, 1].to_list()

corr_list = []
'''Tabulating the differences of Expected and Predictions made by the ANN Model'''
for i in range(len(X_test)):
    '''Rescaling of normalised data'''
    expected = pd.DataFrame(inverse_transform(Y_test.iloc[i].to_list(), max_y, min_y))
    predicted = pd.DataFrame(inverse_transform(prediction.iloc[i].to_list(), max_y, min_y))
    comparison_df = pd.concat([expected, predicted], axis = 1)
    comparison_df.columns = ['Expected', 'Predicted']
    comparison_df.index = get_col_names(target)
    display(comparison_df.style.set_caption(f"Element {i + 1}"))
    corr, _ = pearsonr(expected.iloc[:, 0].tolist(), predicted.iloc[:, 0].tolist())
    corr_list.append(corr)
    
'''Provide average score'''
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(corr_list)):
    print('------------------------------------------------------------------------')
    print(f'> Iteration {i+1} - Pearson Correlation: {corr_list[i]}') 
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Average Pearsons Correlation: {np.mean(corr_list)} - Standard Deviation: {np.std(corr_list)}')
print('------------------------------------------------------------------------')  

Unnamed: 0,Expected,Predicted
Average Particle Velocity,281.0,282.29629
Average Particle Temperature,2453.0,2454.139099
Average Particle Diameter,55.0,54.606765


Unnamed: 0,Expected,Predicted
Average Particle Velocity,258.0,246.051654
Average Particle Temperature,2342.0,2194.663672
Average Particle Diameter,44.0,41.863575


Unnamed: 0,Expected,Predicted
Average Particle Velocity,281.0,282.29629
Average Particle Temperature,2455.0,2454.139099
Average Particle Diameter,55.0,54.606765


Unnamed: 0,Expected,Predicted
Average Particle Velocity,255.0,255.22358
Average Particle Temperature,2363.0,2351.197307
Average Particle Diameter,49.0,47.134624


Unnamed: 0,Expected,Predicted
Average Particle Velocity,275.0,267.09983
Average Particle Temperature,2421.0,2397.352248
Average Particle Diameter,52.0,49.838972


Unnamed: 0,Expected,Predicted
Average Particle Velocity,275.0,274.400083
Average Particle Temperature,2434.0,2424.11441
Average Particle Diameter,54.0,52.42739


Unnamed: 0,Expected,Predicted
Average Particle Velocity,255.0,254.777006
Average Particle Temperature,2326.0,2347.410508
Average Particle Diameter,47.0,48.213573


Unnamed: 0,Expected,Predicted
Average Particle Velocity,264.0,257.605467
Average Particle Temperature,2360.0,2320.949307
Average Particle Diameter,48.0,46.906938


Unnamed: 0,Expected,Predicted
Average Particle Velocity,259.0,266.533414
Average Particle Temperature,2310.0,2418.533661
Average Particle Diameter,47.0,50.804083


Unnamed: 0,Expected,Predicted
Average Particle Velocity,263.0,263.119787
Average Particle Temperature,2391.0,2381.221996
Average Particle Diameter,51.0,49.313998


Unnamed: 0,Expected,Predicted
Average Particle Velocity,261.0,248.152347
Average Particle Temperature,2359.0,2217.624696
Average Particle Diameter,45.0,42.78055


Unnamed: 0,Expected,Predicted
Average Particle Velocity,278.0,272.074883
Average Particle Temperature,2435.0,2417.515062
Average Particle Diameter,52.0,50.495189


Unnamed: 0,Expected,Predicted
Average Particle Velocity,276.0,275.528113
Average Particle Temperature,2436.0,2428.403651
Average Particle Diameter,54.0,52.73873


Unnamed: 0,Expected,Predicted
Average Particle Velocity,265.0,255.504774
Average Particle Temperature,2368.0,2297.988282
Average Particle Diameter,48.0,45.989963


Unnamed: 0,Expected,Predicted
Average Particle Velocity,279.0,276.131465
Average Particle Temperature,2435.0,2433.95551
Average Particle Diameter,51.0,51.030258


Unnamed: 0,Expected,Predicted
Average Particle Velocity,262.0,249.202694
Average Particle Temperature,2364.0,2229.105209
Average Particle Diameter,46.0,43.239038


Unnamed: 0,Expected,Predicted
Average Particle Velocity,270.0,268.759935
Average Particle Temperature,2415.0,2402.668203
Average Particle Diameter,52.0,50.870694


Unnamed: 0,Expected,Predicted
Average Particle Velocity,250.0,241.062507
Average Particle Temperature,2269.0,2140.131239
Average Particle Diameter,41.0,39.685759


Unnamed: 0,Expected,Predicted
Average Particle Velocity,256.0,256.35161
Average Particle Temperature,2367.0,2355.486548
Average Particle Diameter,49.0,47.445963


Unnamed: 0,Expected,Predicted
Average Particle Velocity,261.0,260.863728
Average Particle Temperature,2382.0,2372.643514
Average Particle Diameter,50.0,48.69132


Unnamed: 0,Expected,Predicted
Average Particle Velocity,240.0,248.653863
Average Particle Temperature,2251.0,2322.594737
Average Particle Diameter,42.0,47.405921


Unnamed: 0,Expected,Predicted
Average Particle Velocity,268.0,267.631906
Average Particle Temperature,2408.0,2398.378962
Average Particle Diameter,52.0,50.559355


Unnamed: 0,Expected,Predicted
Average Particle Velocity,248.0,251.945052
Average Particle Temperature,2293.0,2335.933214
Average Particle Diameter,45.0,47.840034


Unnamed: 0,Expected,Predicted
Average Particle Velocity,276.0,283.555775
Average Particle Temperature,2414.0,2464.044632
Average Particle Diameter,48.0,52.009535


Unnamed: 0,Expected,Predicted
Average Particle Velocity,259.0,266.008241
Average Particle Temperature,2312.0,2412.793405
Average Particle Diameter,47.0,50.574839


Unnamed: 0,Expected,Predicted
Average Particle Velocity,258.0,256.46087
Average Particle Temperature,2343.0,2354.234845
Average Particle Diameter,48.0,48.435677


Unnamed: 0,Expected,Predicted
Average Particle Velocity,245.0,250.643884
Average Particle Temperature,2278.0,2330.659863
Average Particle Diameter,44.0,47.668408


Unnamed: 0,Expected,Predicted
Average Particle Velocity,267.0,266.503876
Average Particle Temperature,2405.0,2394.08972
Average Particle Diameter,52.0,50.248016


Unnamed: 0,Expected,Predicted
Average Particle Velocity,256.0,256.35161
Average Particle Temperature,2365.0,2355.486548
Average Particle Diameter,49.0,47.445963


Unnamed: 0,Expected,Predicted
Average Particle Velocity,276.0,282.713843
Average Particle Temperature,2418.0,2460.632464
Average Particle Diameter,49.0,51.898483


Unnamed: 0,Expected,Predicted
Average Particle Velocity,274.0,285.622336
Average Particle Temperature,2404.0,2472.419955
Average Particle Diameter,47.0,52.282118


Unnamed: 0,Expected,Predicted
Average Particle Velocity,229.0,233.184907
Average Particle Temperature,2024.0,2054.027396
Average Particle Diameter,35.0,36.247102


Unnamed: 0,Expected,Predicted
Average Particle Velocity,265.0,265.375846
Average Particle Temperature,2398.0,2389.800479
Average Particle Diameter,51.0,49.936677


Unnamed: 0,Expected,Predicted
Average Particle Velocity,278.0,272.45758
Average Particle Temperature,2435.0,2419.066047
Average Particle Diameter,52.0,50.545667


Unnamed: 0,Expected,Predicted
Average Particle Velocity,237.0,235.810774
Average Particle Temperature,2125.0,2082.728677
Average Particle Diameter,37.0,37.393321


Unnamed: 0,Expected,Predicted
Average Particle Velocity,242.0,237.64888
Average Particle Temperature,2182.0,2102.819574
Average Particle Diameter,38.0,38.195674


Unnamed: 0,Expected,Predicted
Average Particle Velocity,278.0,279.039958
Average Particle Temperature,2430.0,2445.743001
Average Particle Diameter,50.0,51.413892


Unnamed: 0,Expected,Predicted
Average Particle Velocity,259.0,265.745654
Average Particle Temperature,2315.0,2409.923277
Average Particle Diameter,47.0,50.460217


Unnamed: 0,Expected,Predicted
Average Particle Velocity,277.0,270.391019
Average Particle Temperature,2432.0,2410.690725
Average Particle Diameter,52.0,50.273084


Unnamed: 0,Expected,Predicted
Average Particle Velocity,280.0,280.040231
Average Particle Temperature,2449.0,2445.560616
Average Particle Diameter,55.0,53.984087


Unnamed: 0,Expected,Predicted
Average Particle Velocity,253.0,242.37544
Average Particle Temperature,2294.0,2154.481879
Average Particle Diameter,42.0,40.258869


Unnamed: 0,Expected,Predicted
Average Particle Velocity,217.0,229.508694
Average Particle Temperature,1856.0,2013.845603
Average Particle Diameter,32.0,34.642396


Unnamed: 0,Expected,Predicted
Average Particle Velocity,278.0,278.912201
Average Particle Temperature,2445.0,2441.271375
Average Particle Diameter,54.0,53.672747


Unnamed: 0,Expected,Predicted
Average Particle Velocity,278.0,272.840276
Average Particle Temperature,2436.0,2420.617033
Average Particle Diameter,52.0,50.596145


Unnamed: 0,Expected,Predicted
Average Particle Velocity,268.0,267.631906
Average Particle Temperature,2410.0,2398.378962
Average Particle Diameter,52.0,50.559355


Unnamed: 0,Expected,Predicted
Average Particle Velocity,250.0,252.710445
Average Particle Temperature,2303.0,2339.035186
Average Particle Diameter,46.0,47.94099


Unnamed: 0,Expected,Predicted
Average Particle Velocity,254.0,270.209627
Average Particle Temperature,2292.0,2458.715454
Average Particle Diameter,45.0,52.408789


Unnamed: 0,Expected,Predicted
Average Particle Velocity,241.0,249.036559
Average Particle Temperature,2257.0,2324.145723
Average Particle Diameter,43.0,47.4564


Unnamed: 0,Expected,Predicted
Average Particle Velocity,258.0,245.52648
Average Particle Temperature,2337.0,2188.923416
Average Particle Diameter,44.0,41.634331


Unnamed: 0,Expected,Predicted
Average Particle Velocity,276.0,268.324458
Average Particle Temperature,2426.0,2402.315402
Average Particle Diameter,52.0,50.000502


Unnamed: 0,Expected,Predicted
Average Particle Velocity,260.0,260.863728
Average Particle Temperature,2381.0,2372.643514
Average Particle Diameter,50.0,48.69132


Unnamed: 0,Expected,Predicted
Average Particle Velocity,259.0,246.576827
Average Particle Temperature,2346.0,2200.403928
Average Particle Diameter,44.0,42.092819


Unnamed: 0,Expected,Predicted
Average Particle Velocity,264.0,263.119787
Average Particle Temperature,2393.0,2381.221996
Average Particle Diameter,51.0,49.313998


Unnamed: 0,Expected,Predicted
Average Particle Velocity,263.0,261.281681
Average Particle Temperature,2342.0,2361.1311
Average Particle Diameter,48.0,48.511645


Unnamed: 0,Expected,Predicted
Average Particle Velocity,251.0,250.711462
Average Particle Temperature,2349.0,2334.040342
Average Particle Diameter,48.0,45.889267


Unnamed: 0,Expected,Predicted
Average Particle Velocity,238.0,247.811931
Average Particle Temperature,2240.0,2319.182569
Average Particle Diameter,42.0,47.294869


Unnamed: 0,Expected,Predicted
Average Particle Velocity,278.0,271.615648
Average Particle Temperature,2434.0,2415.653879
Average Particle Diameter,52.0,50.434615


Unnamed: 0,Expected,Predicted
Average Particle Velocity,254.0,270.472214
Average Particle Temperature,2290.0,2461.585582
Average Particle Diameter,45.0,52.523411


Unnamed: 0,Expected,Predicted
Average Particle Velocity,277.0,270.008323
Average Particle Temperature,2431.0,2409.139739
Average Particle Diameter,52.0,50.222606


Unnamed: 0,Expected,Predicted
Average Particle Velocity,265.0,259.752059
Average Particle Temperature,2374.0,2367.573322
Average Particle Diameter,50.0,48.86979


Unnamed: 0,Expected,Predicted
Average Particle Velocity,272.0,271.015994
Average Particle Temperature,2422.0,2411.246686
Average Particle Diameter,53.0,51.493373


Unnamed: 0,Expected,Predicted
Average Particle Velocity,264.0,259.292823
Average Particle Temperature,2371.0,2365.712139
Average Particle Diameter,50.0,48.809216


------------------------------------------------------------------------
Score per fold
------------------------------------------------------------------------
> Iteration 1 - Pearson Correlation: 0.9999998140280801
------------------------------------------------------------------------
> Iteration 2 - Pearson Correlation: 0.9999986695039575
------------------------------------------------------------------------
> Iteration 3 - Pearson Correlation: 0.9999997659126658
------------------------------------------------------------------------
> Iteration 4 - Pearson Correlation: 0.9999992594175392
------------------------------------------------------------------------
> Iteration 5 - Pearson Correlation: 0.9999988782445357
------------------------------------------------------------------------
> Iteration 6 - Pearson Correlation: 0.9999997578804085
------------------------------------------------------------------------
> Iteration 7 - Pearson Correlation: 0.9999990937434433
---------