In [45]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV

In [None]:
dataset = pd.read_csv("data.csv")

print("Species : ")
print(dataset['stroke'].unique())

print("Dimensions of the dataset : ", dataset.shape)

print("Features of the dataset :")
print(dataset.describe(include = 'all'))

In [62]:
#MinMaxScaler
X = dataset.drop('stroke', axis = 1)
y = dataset['stroke']

# normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = X.columns)

#use one-hot encoding to represent the set of classes
y = pd.Categorical(y, categories=[0,1])
y = pd.get_dummies(y,dtype=int)

print("Pre-processed data :")
print(X)

print("Pre-processed class :")
print(y)

Pre-processed data :
      gender       age  hypertension  heart_disease  married  not_married  \
0        1.0  0.791667           0.0            1.0      1.0          0.0   
1        1.0  0.972222           0.0            1.0      1.0          0.0   
2        0.0  0.541667           0.0            0.0      1.0          0.0   
3        0.0  0.958333           1.0            0.0      1.0          0.0   
4        1.0  0.986111           0.0            0.0      1.0          0.0   
...      ...       ...           ...            ...      ...          ...   
3420     1.0  1.000000           1.0            0.0      1.0          0.0   
3421     0.0  0.652778           0.0            0.0      1.0          0.0   
3422     0.0  0.986111           0.0            0.0      1.0          0.0   
3423     0.0  0.347222           0.0            0.0      1.0          0.0   
3424     1.0  0.569444           0.0            0.0      1.0          0.0   

      urban  rural  avg_glucose_level       bmi  smoki

In [63]:
data_train, data_test, class_train, class_test = train_test_split(X, y, test_size=0.1)
mlp = MLPClassifier(hidden_layer_sizes = [30, 25, 20], max_iter = 1000, random_state = 42)

In [64]:
#train model
mlp.fit(data_train, class_train)
pred = mlp.predict(data_test)
print(pred)

[[1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [0 1]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]
 [1 0]

In [65]:
print("Accuracy : ", accuracy_score(class_test, pred))
print("Mean Square Error : ", mean_squared_error(class_test, pred))

print("Confusion Matrix for each label : ")
print('[tn fp]\n[fn tp]')
print(multilabel_confusion_matrix(class_test, pred))

print("Classification Report : ")
print(classification_report(class_test, pred))

Accuracy :  0.9620991253644315
Mean Square Error :  0.037900874635568516
Confusion Matrix for each label : 
[tn fp]
[fn tp]
[[[  1  13]
  [  0 329]]

 [[329   0]
  [ 13   1]]]
Classification Report : 
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       329
           1       1.00      0.07      0.13        14

   micro avg       0.96      0.96      0.96       343
   macro avg       0.98      0.54      0.56       343
weighted avg       0.96      0.96      0.95       343
 samples avg       0.96      0.96      0.96       343



In [66]:
print("Mean Square Error : ", mean_squared_error(class_test, pred))

Mean Square Error :  0.037900874635568516


In [67]:
CV = cross_validate(mlp, X, y, cv=10, scoring=['neg_mean_squared_error'])
print('MSE')
print(-1*CV['test_neg_mean_squared_error'])
print('Average MSE = ', sum(-1 * CV['test_neg_mean_squared_error']) / len(CV['test_neg_mean_squared_error']))

MSE
[0.52478134 0.00291545 0.00291545 0.02040816 0.01603499 0.01023392
 0.00877193 0.00584795 0.         0.01754386]
Average MSE =  0.06094530544047192
