# ML Prediction - Stroke Prediction

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd

file_path = '/content/drive/MyDrive/Stroke_Prediction_Project/healthcare-dataset-stroke-data.csv'

In [None]:
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


## Pre-Processed Data Analysis

In [None]:
df.describe()

Unnamed: 0,id,age,hypertension,heart_disease,avg_glucose_level,bmi,stroke
count,5110.0,5110.0,5110.0,5110.0,5110.0,4909.0,5110.0
mean,36517.829354,43.226614,0.097456,0.054012,106.147677,28.893237,0.048728
std,21161.721625,22.612647,0.296607,0.226063,45.28356,7.854067,0.21532
min,67.0,0.08,0.0,0.0,55.12,10.3,0.0
25%,17741.25,25.0,0.0,0.0,77.245,23.5,0.0
50%,36932.0,45.0,0.0,0.0,91.885,28.1,0.0
75%,54682.0,61.0,0.0,0.0,114.09,33.1,0.0
max,72940.0,82.0,1.0,1.0,271.74,97.6,1.0


In [None]:
gender = df['gender']
gender.value_counts()

Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
Female,2994
Male,2115
Other,1


In [None]:
age = df['age']

age.describe()

Unnamed: 0,age
count,5110.0
mean,43.226614
std,22.612647
min,0.08
25%,25.0
50%,45.0
75%,61.0
max,82.0


In [None]:
hypertension = df['hypertension']
hypertension.value_counts()

Unnamed: 0_level_0,count
hypertension,Unnamed: 1_level_1
0,4612
1,498


In [None]:
heart_disease = df['heart_disease']
heart_disease.value_counts()

Unnamed: 0_level_0,count
heart_disease,Unnamed: 1_level_1
0,4834
1,276


In [None]:
ever_married = df['ever_married']
ever_married.value_counts()

Unnamed: 0_level_0,count
ever_married,Unnamed: 1_level_1
Yes,3353
No,1757


In [None]:
work_type = df['work_type']
work_type.value_counts()

Unnamed: 0_level_0,count
work_type,Unnamed: 1_level_1
Private,2925
Self-employed,819
children,687
Govt_job,657
Never_worked,22


In [None]:
residence_type = df['Residence_type']
residence_type.value_counts()

Unnamed: 0_level_0,count
Residence_type,Unnamed: 1_level_1
Urban,2596
Rural,2514


In [None]:
avg_glucose_level = df['avg_glucose_level']
avg_glucose_level.describe()

Unnamed: 0,avg_glucose_level
count,5110.0
mean,106.147677
std,45.28356
min,55.12
25%,77.245
50%,91.885
75%,114.09
max,271.74


In [None]:
bmi = df['bmi']
bmi.describe()

Unnamed: 0,bmi
count,4909.0
mean,28.893237
std,7.854067
min,10.3
25%,23.5
50%,28.1
75%,33.1
max,97.6


In [None]:
smoking_status = df['smoking_status']
smoking_status.value_counts()

Unnamed: 0_level_0,count
smoking_status,Unnamed: 1_level_1
never smoked,1892
Unknown,1544
formerly smoked,885
smokes,789


In [None]:
stroke = df['stroke']
stroke.value_counts()

Unnamed: 0_level_0,count
stroke,Unnamed: 1_level_1
0,4861
1,249


## Processing & Cleaning Data

In [None]:
# filling in NaN values
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='median')
df['bmi'] = imputer.fit_transform(df[['bmi']])

In [None]:
X = df.drop(columns=['stroke'])
y = df['stroke']

In [None]:
from imblearn.over_sampling import SMOTE

# Convert categorical features to numerical using one-hot encoding
X = pd.get_dummies(X, columns=['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status'])

print(X.head())

oversampler = SMOTE()
X, y = oversampler.fit_resample(X,y)

      id   age  hypertension  heart_disease  avg_glucose_level   bmi  \
0   9046  67.0             0              1             228.69  36.6   
1  51676  61.0             0              0             202.21  28.1   
2  31112  80.0             0              1             105.92  32.5   
3  60182  49.0             0              0             171.23  34.4   
4   1665  79.0             1              0             174.12  24.0   

   gender_Female  gender_Male  gender_Other  ever_married_No  ...  \
0          False         True         False            False  ...   
1           True        False         False            False  ...   
2          False         True         False            False  ...   
3           True        False         False            False  ...   
4           True        False         False            False  ...   

   work_type_Never_worked  work_type_Private  work_type_Self-employed  \
0                   False               True                    False   
1     

## Model Training

In [None]:
# scaling the data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

In [None]:
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, y, stratify=y, test_size=0.2, random_state=42)

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

logistic_regression_model = LogisticRegression(random_state=42, class_weight='balanced')
logistic_regression_model.fit(X_train_scaled, y_train)

y_pred = logistic_regression_model.predict(X_test_scaled)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9609254498714653
Precision:  0.995575221238938
Recall:  0.9259259259259259
F1 Score:  0.9594882729211087
Classification Report:  
               precision    recall  f1-score   support

           0       0.93      1.00      0.96       973
           1       1.00      0.93      0.96       972

    accuracy                           0.96      1945
   macro avg       0.96      0.96      0.96      1945
weighted avg       0.96      0.96      0.96      1945

Confusion Matrix:  
 [[969   4]
 [ 72 900]]


#### Grid Search with Logistic Regression

In [None]:
from sklearn.model_selection import GridSearchCV


logistic_regression_model = LogisticRegression(random_state=42, class_weight='balanced')

# logistic_regression_model.fit(X_train_scaled,y_train)

# y_preds = logistic_regression_model.predict(X_test_scaled)


params = [{
    "C": [0.01,0.05,0.1,0.25,1,10],
    "penalty": ['l1','l2'],
    "solver": ['liblinear']
}]

gridsearch = GridSearchCV(estimator=logistic_regression_model, param_grid=params, cv=10)

gridsearch.fit(X_train_scaled,y_train)

print("best parameters: ", gridsearch.best_params_)

print("best estimator: ", gridsearch.best_estimator_)

print("best score: ", gridsearch.best_score_)

best parameters:  {'C': 0.25, 'penalty': 'l2', 'solver': 'liblinear'}
best estimator:  LogisticRegression(C=0.25, class_weight='balanced', random_state=42,
                   solver='liblinear')
best score:  0.9603960920156291


#### Pipeline for Logistic Regression

In [None]:
from sklearn.pipeline import Pipeline

stroke_prediction_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(C=0.25, penalty='l2', solver='liblinear',random_state=42,class_weight='balanced'))
])

stroke_prediction_pipeline.fit(X_train,y_train)

y_preds = stroke_prediction_pipeline.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9629820051413882
Precision:  0.9988913525498891
Recall:  0.926954732510288
F1 Score:  0.9615795090715048
Classification Report:  
               precision    recall  f1-score   support

           0       0.93      1.00      0.96       973
           1       1.00      0.93      0.96       972

    accuracy                           0.96      1945
   macro avg       0.97      0.96      0.96      1945
weighted avg       0.97      0.96      0.96      1945

Confusion Matrix:  
 [[972   1]
 [ 71 901]]


### SVM

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

svm_model = SVC(random_state=42)
svm_model.fit(X_train_scaled, y_train)

y_pred = svm_model.predict(X_test_scaled)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9629820051413882
Precision:  0.9988913525498891
Recall:  0.926954732510288
F1 Score:  0.9615795090715048
Classification Report:  
               precision    recall  f1-score   support

           0       0.93      1.00      0.96       973
           1       1.00      0.93      0.96       972

    accuracy                           0.96      1945
   macro avg       0.97      0.96      0.96      1945
weighted avg       0.97      0.96      0.96      1945

Confusion Matrix:  
 [[972   1]
 [ 71 901]]


#### Grid Search with SVM

In [None]:
from sklearn.model_selection import GridSearchCV

svc_model = SVC(random_state=42)


params = [{
    "C": [0.01,0.05,0.1,0.25,1,10],
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "degree": [1,2,3,4,5,6]
}]

gridsearch = GridSearchCV(estimator=svc_model, param_grid=params, cv=10)

gridsearch.fit(X_train_scaled,y_train)

print("best parameters: ", gridsearch.best_params_)

print("best estimator: ", gridsearch.best_estimator_)

print("best score: ", gridsearch.best_score_)

best parameters:  {'C': 0.05, 'degree': 1, 'kernel': 'linear'}
best estimator:  SVC(C=0.05, degree=1, kernel='linear', random_state=42)
best score:  0.9614240388019306


#### Pipeline for SVM

In [None]:
stroke_prediction_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', SVC(C=0.05, degree=1, kernel='linear', random_state=42))
])

stroke_prediction_pipeline.fit(X_train,y_train)

y_preds = stroke_prediction_pipeline.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9629820051413882
Precision:  0.9988913525498891
Recall:  0.926954732510288
F1 Score:  0.9615795090715048
Classification Report:  
               precision    recall  f1-score   support

           0       0.93      1.00      0.96       973
           1       1.00      0.93      0.96       972

    accuracy                           0.96      1945
   macro avg       0.97      0.96      0.96      1945
weighted avg       0.97      0.96      0.96      1945

Confusion Matrix:  
 [[972   1]
 [ 71 901]]


### Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

random_forest_model = RandomForestClassifier(random_state=42, class_weight='balanced')
random_forest_model.fit(X_train_scaled, y_train)

y_pred = random_forest_model.predict(X_test_scaled)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9773778920308483
Precision:  0.9904862579281184
Recall:  0.9639917695473251
F1 Score:  0.9770594369134515
Classification Report:  
               precision    recall  f1-score   support

           0       0.96      0.99      0.98       973
           1       0.99      0.96      0.98       972

    accuracy                           0.98      1945
   macro avg       0.98      0.98      0.98      1945
weighted avg       0.98      0.98      0.98      1945

Confusion Matrix:  
 [[964   9]
 [ 35 937]]


#### Grid Search with Random Forest Classifier

In [None]:
from sklearn.model_selection import GridSearchCV

random_forest_model = RandomForestClassifier(random_state=42, class_weight='balanced')

params = [{
    "n_estimators": [50,100,250,500],
    "criterion": ["gini", "entropy", "log_loss"]
}]

gridsearch = GridSearchCV(estimator=random_forest_model, param_grid=params, cv=10)

gridsearch.fit(X_train_scaled,y_train)

print("best parameters: ", gridsearch.best_params_)

print("best estimator: ", gridsearch.best_estimator_)

print("best score: ", gridsearch.best_score_)

best parameters:  {'criterion': 'gini', 'n_estimators': 250}
best estimator:  RandomForestClassifier(class_weight='balanced', n_estimators=250,
                       random_state=42)
best score:  0.9724828206833349


#### Pipeline for Random Forest Classifier

In [None]:
stroke_prediction_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestClassifier(n_estimators=250, random_state=42, criterion='gini',class_weight='balanced'))
])

stroke_prediction_pipeline.fit(X_train,y_train)

y_preds = stroke_prediction_pipeline.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9773778920308483
Precision:  0.9904862579281184
Recall:  0.9639917695473251
F1 Score:  0.9770594369134515
Classification Report:  
               precision    recall  f1-score   support

           0       0.96      0.99      0.98       973
           1       0.99      0.96      0.98       972

    accuracy                           0.98      1945
   macro avg       0.98      0.98      0.98      1945
weighted avg       0.98      0.98      0.98      1945

Confusion Matrix:  
 [[964   9]
 [ 35 937]]


### XGB Classifier

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

xgb_model = XGBClassifier(random_state=42,eval_metric='logloss')
xgb_model.fit(X_train_scaled, y_train)

y_pred = xgb_model.predict(X_test_scaled)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9737789203084833
Precision:  0.9811912225705329
Recall:  0.9660493827160493
F1 Score:  0.973561430793157
Classification Report:  
               precision    recall  f1-score   support

           0       0.97      0.98      0.97       973
           1       0.98      0.97      0.97       972

    accuracy                           0.97      1945
   macro avg       0.97      0.97      0.97      1945
weighted avg       0.97      0.97      0.97      1945

Confusion Matrix:  
 [[955  18]
 [ 33 939]]


#### Grid Search with XGB Classifier

In [None]:
from sklearn.model_selection import GridSearchCV

xgb_model = XGBClassifier(random_state=42,eval_metric='logloss')

params = {
    'learning_rate': [0.01,0.05,0.1,0.3,1],
    'gamma': [0,0.05,0.1,0.2]
}

gridsearch = GridSearchCV(estimator=xgb_model, param_grid=params, cv=10)
gridsearch.fit(X_train_scaled,y_train)

print("best parameters: ", gridsearch.best_params_)

print("best estimator: ", gridsearch.best_estimator_)

print("best score: ", gridsearch.best_score_)

best parameters:  {'gamma': 0.1, 'learning_rate': 0.3}
best estimator:  XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='logloss',
              feature_types=None, gamma=0.1, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.3, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=None,
              n_jobs=None, num_parallel_tree=None, random_state=42, ...)
best score:  0.969396333535151


#### Pipeline for XGB Classifier

In [None]:
stroke_prediction_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', XGBClassifier(random_state=42,eval_metric='logloss',gamma=0.1,learning_rate=0.3))
])

stroke_prediction_pipeline.fit(X_train,y_train)

y_preds = stroke_prediction_pipeline.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9737789203084833
Precision:  0.9811912225705329
Recall:  0.9660493827160493
F1 Score:  0.973561430793157
Classification Report:  
               precision    recall  f1-score   support

           0       0.97      0.98      0.97       973
           1       0.98      0.97      0.97       972

    accuracy                           0.97      1945
   macro avg       0.97      0.97      0.97      1945
weighted avg       0.97      0.97      0.97      1945

Confusion Matrix:  
 [[955  18]
 [ 33 939]]


### Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

decision_tree_model = DecisionTreeClassifier(random_state=42, class_weight='balanced')
decision_tree_model.fit(X_train_scaled, y_train)

y_pred = decision_tree_model.predict(X_test_scaled)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9501285347043702
Precision:  0.9405840886203424
Recall:  0.9609053497942387
F1 Score:  0.9506361323155216
Classification Report:  
               precision    recall  f1-score   support

           0       0.96      0.94      0.95       973
           1       0.94      0.96      0.95       972

    accuracy                           0.95      1945
   macro avg       0.95      0.95      0.95      1945
weighted avg       0.95      0.95      0.95      1945

Confusion Matrix:  
 [[914  59]
 [ 38 934]]


#### Grid Search with Decision Tree Classifier

In [None]:
from sklearn.model_selection import GridSearchCV

decision_tree_model = DecisionTreeClassifier(random_state=42, class_weight='balanced')

params = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'splitter': ['best','random']
}

gridsearch = GridSearchCV(estimator=decision_tree_model, param_grid=params, cv=10)
gridsearch.fit(X_train_scaled,y_train)

print("best parameters: ", gridsearch.best_params_)

print("best estimator: ", gridsearch.best_estimator_)

print("best score: ", gridsearch.best_score_)

best parameters:  {'criterion': 'gini', 'splitter': 'random'}
best estimator:  DecisionTreeClassifier(class_weight='balanced', random_state=42,
                       splitter='random')
best score:  0.9468921731132529


#### Pipeline for DecisionTree Classifier

In [None]:
stroke_prediction_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', DecisionTreeClassifier(random_state=42,class_weight='balanced',criterion='gini',splitter='random'))
])

stroke_prediction_pipeline.fit(X_train,y_train)

y_preds = stroke_prediction_pipeline.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9501285347043702
Precision:  0.9405840886203424
Recall:  0.9609053497942387
F1 Score:  0.9506361323155216
Classification Report:  
               precision    recall  f1-score   support

           0       0.96      0.94      0.95       973
           1       0.94      0.96      0.95       972

    accuracy                           0.95      1945
   macro avg       0.95      0.95      0.95      1945
weighted avg       0.95      0.95      0.95      1945

Confusion Matrix:  
 [[914  59]
 [ 38 934]]


### MLP Classifier

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

mlp_model = MLPClassifier(random_state=42)
mlp_model.fit(X_train_scaled, y_train)

y_pred = mlp_model.predict(X_test_scaled)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9701799485861182
Precision:  0.9790356394129979
Recall:  0.9609053497942387
F1 Score:  0.9698857736240913
Classification Report:  
               precision    recall  f1-score   support

           0       0.96      0.98      0.97       973
           1       0.98      0.96      0.97       972

    accuracy                           0.97      1945
   macro avg       0.97      0.97      0.97      1945
weighted avg       0.97      0.97      0.97      1945

Confusion Matrix:  
 [[953  20]
 [ 38 934]]


#### Grid Search with MLP Classifier

In [12]:
from sklearn.model_selection import GridSearchCV

mlp_model = MLPClassifier(random_state=42)

params = {
    'hidden_layer_sizes': [(100,)],
    'activation': ['relu','tanh'],
    'alpha': [0.0001,0.001,0.01,0.1,1]
}

gridsearch = GridSearchCV(estimator=mlp_model, param_grid=params, cv=10)
gridsearch.fit(X_train_scaled,y_train)

print("best parameters: ", gridsearch.best_params_)

print("best estimator: ", gridsearch.best_estimator_)

print("best score: ", gridsearch.best_score_)

best parameters:  {'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (100,)}
best estimator:  MLPClassifier(alpha=0.1, random_state=42)
best score:  0.9643819912457443


#### Pipeline for MLP Classifier

In [14]:
from sklearn.pipeline import Pipeline

stroke_prediction_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', MLPClassifier(random_state=42,activation='relu',alpha=0.0001,hidden_layer_sizes=(100,)))
])

stroke_prediction_pipeline.fit(X_train,y_train)

y_preds = stroke_prediction_pipeline.predict(X_test)

print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Precision: ", precision_score(y_test, y_pred))
print("Recall: ", recall_score(y_test, y_pred))
print("F1 Score: ", f1_score(y_test, y_pred))

print("Classification Report: ", "\n", classification_report(y_test, y_pred))

print("Confusion Matrix: ", "\n", confusion_matrix(y_test,y_pred))

Accuracy:  0.9701799485861182
Precision:  0.9790356394129979
Recall:  0.9609053497942387
F1 Score:  0.9698857736240913
Classification Report:  
               precision    recall  f1-score   support

           0       0.96      0.98      0.97       973
           1       0.98      0.96      0.97       972

    accuracy                           0.97      1945
   macro avg       0.97      0.97      0.97      1945
weighted avg       0.97      0.97      0.97      1945

Confusion Matrix:  
 [[953  20]
 [ 38 934]]


## Results

## Conclusion