# Table of Contents
1. Introduction
2. Import
3. Analysis & Preprocessing
4. Model
5. Training
6. Analysis & Conclusion

# 1. Introduction
References:
- https://machinelearningmastery.com/feature-selection-for-regression-data/
- https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor
- https://towardsdatascience.com/deep-neural-multilayer-perceptron-mlp-with-scikit-learn-2698e77155e
- https://www.studytonight.com/post/what-is-mean-squared-error-mean-absolute-error-root-mean-squared-error-and-r-squared#:~:text=MAE%3A%20It%20is%20not%20very,the%20weighted%20individual%20differences%20equally.

With MLPRegressor, we can split the training data to also be used as validation data for early stopping.

# 2. Import

In [1]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from tensorflow.keras.losses import MeanSquaredLogarithmicError
import matplotlib.pyplot as plt
import numpy as np 
import sklearn.metrics as metrics

# 3. Anaysis & Preprocessing

In [2]:
data = pd.read_excel('../../data_sets/Volumetric_features.xlsx')
data_feat = pd.DataFrame(data, columns = data.columns[:-1])
data_feat = data_feat.drop(['S.No','Age'], axis=1)

data.head(5)
data.describe()

Unnamed: 0,S.No,Left-Lateral-Ventricle,Left-Inf-Lat-Vent,Left-Cerebellum-White-Matter,Left-Cerebellum-Cortex,Left-Thalamus,Left-Caudate,Left-Putamen,Left-Pallidum,3rd-Ventricle,...,rh_supramarginal_thickness,rh_frontalpole_thickness,rh_temporalpole_thickness,rh_transversetemporal_thickness,rh_insula_thickness,rh_MeanThickness_thickness,BrainSegVolNotVent.2,eTIV.1,Age,dataset
count,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,...,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0,4226.0
mean,2113.5,13370.040795,574.849716,14646.696711,52002.811571,7164.947539,3337.653526,4505.158755,1958.214458,1418.947373,...,2.429779,2.684327,3.555803,2.288283,2.846123,2.372266,1085468.0,1514925.0,58.374586,4.533838
std,1220.085448,9194.928348,594.590387,2622.868798,6378.435917,1207.229615,502.352001,713.65858,287.139826,635.143286,...,0.185543,0.275245,0.332094,0.269851,0.195038,0.146944,124888.1,165179.8,20.064099,3.057928
min,1.0,2204.1,0.0,6920.1,29911.8,4145.4,1035.6,2294.0,851.9,39.7,...,1.345,1.655,1.94,1.176,1.533,1.48329,627960.0,832981.5,18.0,1.0
25%,1057.25,7031.625,243.2,12909.875,47359.675,6239.425,2984.5,4008.125,1764.7,941.825,...,2.309,2.51,3.36,2.105,2.72,2.274935,995758.5,1404471.0,43.0,1.0
50%,2113.5,10669.95,385.8,14277.0,51333.65,7032.15,3294.05,4438.1,1940.1,1225.45,...,2.4405,2.685,3.5865,2.297,2.851,2.383375,1075919.0,1511767.0,61.0,4.0
75%,3169.75,17332.65,720.825,15959.725,56287.775,7977.4,3655.125,4963.025,2128.0,1780.225,...,2.56275,2.851,3.79,2.476,2.975,2.483142,1168888.0,1625445.0,76.0,8.0
max,4226.0,79812.5,7533.8,35042.5,79948.2,13008.3,6018.0,8446.1,4357.7,4461.6,...,2.996,3.928,4.487,3.123,3.482,2.80373,1545129.0,2075213.0,96.0,9.0


In [3]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x = scaler.fit_transform(data)
n = 20
pca = PCA(n_components=n)
pca_data = pca.fit_transform(x)

labels = data.columns.values.tolist()
label_index = [np.abs(pca.components_[i]).argmax() for i in range(n)]
columns = [labels[label_index[i]] for i in range(n)]

pca_df = pd.DataFrame(data=pca_data, columns=columns)
print(pca_df.head)
pca_df.head()

<bound method NDFrame.head of       rh_MeanThickness_thickness  CerebralWhiteMatterVol  \
0                       2.116693                1.364193   
1                       1.781763                1.577277   
2                       2.423065                1.424487   
3                       4.657487                1.366377   
4                       3.795704                1.701514   
...                          ...                     ...   
4221                    3.332053                2.220376   
4222                    4.258130               -2.535943   
4223                    7.826457                2.169779   
4224                   -0.702316                2.439429   
4225                   -2.373678               -3.566133   

      Left-Lateral-Ventricle  lh_lateralorbitofrontal_thickness  SurfaceHoles  \
0                   1.509728                          -2.002925     -1.880862   
1                   1.751875                          -1.118644     -1.486870   
2     

Unnamed: 0,rh_MeanThickness_thickness,CerebralWhiteMatterVol,Left-Lateral-Ventricle,lh_lateralorbitofrontal_thickness,SurfaceHoles,CC_Posterior,rh_entorhinal_thickness,CC_Posterior.1,Right-Caudate,MaskVol-to-eTIV,rh_frontalpole_thickness,MaskVol-to-eTIV.1,Right-Cerebellum-White-Matter,MaskVol-to-eTIV.2,Right-vessel,non-WM-hypointensities,rh_isthmuscingulate_thickness,5th-Ventricle,Right-vessel.1,non-WM-hypointensities.1
0,2.116693,1.364193,1.509728,-2.002925,-1.880862,2.279485,-1.647957,-0.015232,-0.459961,1.817807,-0.646434,0.135039,-1.132815,1.600467,-1.104807,-0.227839,0.135636,0.681317,-0.55052,0.230722
1,1.781763,1.577277,1.751875,-1.118644,-1.48687,2.078948,-1.814664,-0.373274,-0.914708,2.002901,-0.588284,0.58458,-0.921479,1.525118,-1.551568,-0.518161,-0.332803,0.362193,-0.527036,0.415532
2,2.423065,1.424487,1.583663,-1.542719,-1.246703,1.775612,-2.45427,-0.641437,-1.265463,1.78442,-0.72165,0.545587,-0.494252,1.809298,-1.101615,-0.590066,-0.161438,0.039744,-0.175035,0.034108
3,4.657487,1.366377,1.174502,-0.637784,-1.459263,2.25907,-1.239307,-0.937875,-1.09174,1.582241,-0.510306,0.070056,-0.524549,1.880366,-1.459557,-0.027886,-0.315797,0.39995,-0.235953,0.062979
4,3.795704,1.701514,2.226914,-1.242892,-1.389544,2.824516,-1.684124,-0.290602,-0.68975,1.821082,-0.832835,0.444201,-1.059507,2.152221,-0.704298,-0.438269,-0.464209,0.839843,-0.503651,0.524223


In [4]:
# Split for validation --> train, val, test = 80/15/5
# train to test (val and test) --> include random shuffle
x_train, x_validation, y_train, y_validation = train_test_split(pca_df, data['Age'], test_size=0.20, random_state=33)

# (20% of total dataset -> 75% validation = 15% total, 25% validation = 5% total
# val and test --> include random shuffle
x_val, x_test, y_val, y_test = train_test_split(x_validation, y_validation, test_size=0.25, random_state=33)

print("x_train shape is:",x_train.shape)
print("y_train shape is:",y_train.shape, "\n")
print("x_val shape is:",x_val.shape)
print("y_val shape is:",y_val.shape, "\n")
print("x_test shape is:",x_test.shape)
print("y_test shape is:",y_test.shape)

x_train shape is: (3380, 20)
y_train shape is: (3380,) 

x_val shape is: (634, 20)
y_val shape is: (634,) 

x_test shape is: (212, 20)
y_test shape is: (212,)


# 4. Model

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
import pandas as pd

reg = MLPRegressor(hidden_layer_sizes=(64,64,64),activation="relu" ,random_state=1, max_iter=2000).fit(x_train, y_train)
reg.fit(x_train, y_train)
# y_pred=reg.predict(x_test)
# print("The Score with ", (r2_score(y_pred, y_test)))


In [None]:
y_pred=model.predict(x_test)
print("The Score with ", (r2_score(y_pred, y_test)))

In [None]:
print("Training set score: %f" % model.score(x_train, y_train))
print("Test set score: %f" % model.score(x_test, y_test))
print("Coef shape=", len(model.coefs_))