# Data Preprocessing

## Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.feature_selection import SelectKBest, f_classif

## Dataset

In [2]:
dataset = pd.read_csv('data.csv')

In [3]:
dataset.head()

Unnamed: 0,Gender,Age,Urea,Cr,HbA1c,Chol,TG,HDL,LDL,VLDL,BMI,Class
0,M,44,6.8,64,4.9,4.9,2.8,2.0,1.8,1.2,21.0,N
1,F,35,3.9,38,5.4,3.8,5.9,0.5,4.3,1.0,22.0,N
2,M,40,5.0,63,4.0,4.8,2.5,1.1,2.7,1.1,23.0,N
3,F,35,2.8,50,5.0,4.7,2.5,1.3,2.4,1.1,20.0,N
4,F,42,2.1,45,4.9,3.0,1.1,1.1,1.4,1.4,21.0,N


In [4]:
dataset.describe(include='all')

Unnamed: 0,Gender,Age,Urea,Cr,HbA1c,Chol,TG,HDL,LDL,VLDL,BMI,Class
count,1000,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000
unique,3,,,,,,,,,,,3
top,M,,,,,,,,,,,Y
freq,565,,,,,,,,,,,844
mean,,53.528,5.124743,68.943,8.28116,4.86282,2.34961,1.20475,2.60979,1.8547,29.57802,
std,,8.799241,2.935165,59.984747,2.534003,1.301738,1.401176,0.660414,1.115102,3.663599,4.962388,
min,,20.0,0.5,6.0,0.9,0.0,0.3,0.2,0.3,0.1,19.0,
25%,,51.0,3.7,48.0,6.5,4.0,1.5,0.9,1.8,0.7,26.0,
50%,,55.0,4.6,60.0,8.0,4.8,2.0,1.1,2.5,0.9,30.0,
75%,,59.0,5.7,73.0,10.2,5.6,2.9,1.3,3.3,1.5,33.0,


In [5]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [6]:
print(X)
X.shape

[['M' 44 6.8 ... 1.8 1.2 21.0]
 ['F' 35 3.9 ... 4.3 1.0 22.0]
 ['M' 40 5.0 ... 2.7 1.1 23.0]
 ...
 ['M' 54 5.0 ... 3.0 0.7 33.0]
 ['M' 30 7.1 ... 2.4 8.1 27.4]
 ['M' 31 3.0 ... 2.4 15.4 37.2]]


(1000, 11)

In [7]:
print(y)
y.shape

['N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'P' 'P' 'P' 'P' 'P'
 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P'
 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P'
 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y

(1000,)

## Encode Categorical Data

In [8]:
# One Hot Encoding for Independent Variables
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [9]:
print(X[:, 0:4])

[[0.0 1.0 0.0 44]
 [1.0 0.0 0.0 35]
 [0.0 1.0 0.0 40]
 ...
 [0.0 1.0 0.0 54]
 [0.0 1.0 0.0 30]
 [0.0 1.0 0.0 31]]


In [10]:
# Label Encoder for Dependent Variable
le = LabelEncoder()
y = le.fit_transform(y)

In [11]:
print(y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 

## Training and Test Set Allocation

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1000000)

In [13]:
# print(X_train)

In [14]:
# print(X_test)

In [15]:
# print(y_train)

In [16]:
# print(y_test)

## Feature Scaling

In [17]:
sc = StandardScaler()
X_train[:, 3:] = sc.fit_transform(X_train[:, 3:])
X_test[:, 3:] = sc.fit_transform(X_test[:, 3:])

In [18]:
print(X_train)

[[0.0 1.0 0.0 ... -0.5360979661303736 -0.21176493523210396
  0.2812116799700869]
 [0.0 1.0 0.0 ... -0.2705331208883882 -0.23905423100943693
  0.0795033415774561]
 [1.0 0.0 0.0 ... -0.978706041533683 -0.3482114141187689
  0.3820658491664023]
 ...
 [1.0 0.0 0.0 ... 1.05729110532154 -0.18447563945477094
  0.2812116799700869]
 [1.0 0.0 0.0 ... -0.3590547359690498 0.061128022541225874
  1.4914617103258718]
 [1.0 0.0 0.0 ... 1.3228559505635253 -0.21176493523210396
  -1.3324550271709594]]


In [19]:
print(X_test)

[[0.0 1.0 0.0 ... -0.3104462592647769 -0.34933475964769956
  -0.49658275449064243]
 [0.0 1.0 0.0 ... -1.0711109067741815 0.36612043931568633
  1.7178779822819963]
 [0.0 1.0 0.0 ... -1.3563601495902085 -0.45940479025745123
  1.1139341449803675]
 ...
 [0.0 1.0 0.0 ... 1.0207168738766819 -0.2942997443428237
  -0.09395352962288993]
 [0.0 1.0 0.0 ... 0.06988606448992583 2.2648284673339028
  0.38920154021841274]
 [1.0 0.0 0.0 ... 2.161713845140789 -0.4043697749525754
  -1.3018412042261474]]


# MLP Training and Prediction

## Training

In [20]:
mlp = MLPClassifier(hidden_layer_sizes=(1000, 1000, 1000, 1000), max_iter = 1000, activation = 'relu', alpha = 0.05)

In [21]:
mlp.fit(X_train, y_train)

In [None]:
prediction = mlp.predict(X_test)
prediction

: 

# Result Evaluation

### Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, prediction)
cm_fig = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ['ND', 'PD', 'D'])
cm_fig.plot()
plt.show()

: 

### Classification Report

In [None]:
print(classification_report(y_test, prediction))

: 