In [1]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.pipeline import Pipeline as imb_pipeline
from imblearn.over_sampling import SMOTENC
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from imblearn.pipeline import Pipeline as imb_pipeline
import numpy as np


data = pd.read_csv('healthcare-dataset-stroke-data.csv')

balanced_data = data.dropna()
balanced_data = balanced_data.drop(columns= "id")
balanced_data = balanced_data[balanced_data.age > 20]
balanced_data = balanced_data.drop(balanced_data.index[balanced_data["gender"] == "Other"])
balanced_data = balanced_data.drop(balanced_data.index[balanced_data["work_type"] == "Never_worked"])

features = ['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'work_type', 'Residence_type', 'avg_glucose_level', "bmi", "smoking_status"]
cat_columns = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status', 'hypertension', 'heart_disease']
num_columns = ['age', 'avg_glucose_level', 'bmi']

bal_data_train, bal_data_test = train_test_split(balanced_data, train_size = 0.8, random_state=1, stratify= balanced_data["stroke"])
bal_data_train, bal_data_val = train_test_split(bal_data_train, train_size = 0.8, random_state=1)

X_train = bal_data_train.drop('stroke', axis = 1)
y_train = bal_data_train['stroke']

X_val = bal_data_val.drop('stroke', axis = 1)
y_val = bal_data_val['stroke']

X_test = bal_data_test.drop('stroke', axis = 1)
y_test = bal_data_test['stroke']

preprocessing = ColumnTransformer([
    ('one-hot-encoder', OneHotEncoder(sparse_output=False), cat_columns),
    ('scaler', StandardScaler(), num_columns)
])

In [2]:
prob_pipeline = imb_pipeline(steps= [
        ("smotenc", SMOTENC(cat_columns, random_state = 1, sampling_strategy= 0.7)),
        ('preprocessing', preprocessing),
        ('classifier', SVC(C=1, gamma=0.001, random_state=1, probability= True)) #Same classifier as the one in pkl file
       ])

In [3]:
prob_pipeline.fit(X_train, y_train)


In [4]:
prob_pipeline["classifier"].classes_

array([0, 1], dtype=int64)

Probabilities not determined yet

In [5]:
#Convert probabilities given by the pipeline into binary classification
#Adjusting recall precision threshold using this function
#Takes in a numpy array of predictions (2 by N size)
#Threshold argument is a float between 0 and 1 (Default is 0.5)

def convert_prob(predictions, threshold = 0.5):
    return (predictions >= threshold).astype(int)[:,1]

Testing method made above

In [6]:
prob_pred = prob_pipeline.predict_proba(X_train)

print(prob_pred)

convert_prob(prob_pred)

[[0.46635458 0.53364542]
 [0.82264133 0.17735867]
 [0.84799133 0.15200867]
 ...
 [0.10405663 0.89594337]
 [0.81660785 0.18339215]
 [0.66543957 0.33456043]]


array([1, 0, 0, ..., 1, 0, 0])

Original Scores

In [7]:
print("Training set")
print("F1", f1_score(y_train, prob_pipeline.predict(X_train)))
print("Recall",recall_score(y_train, prob_pipeline.predict(X_train)))
print("Precision",precision_score(y_train, prob_pipeline.predict(X_train)))
print(confusion_matrix(y_train, prob_pipeline.predict(X_train)))

print()
print("Val set")
print("F1", f1_score(y_val, prob_pipeline.predict(X_val)))
print("Recall",recall_score(y_val, prob_pipeline.predict(X_val)))
print("Precision",precision_score(y_val, prob_pipeline.predict(X_val)))
print(confusion_matrix(y_val, prob_pipeline.predict(X_val)))

print()
print("Test set")
print("F1", f1_score(y_test, prob_pipeline.predict(X_test)))
print("Recall", recall_score(y_test, prob_pipeline.predict(X_test)))
print("Precision",precision_score(y_test, prob_pipeline.predict(X_test)))
print(confusion_matrix(y_test, prob_pipeline.predict(X_test)))

Training set
F1 0.22429906542056074
Recall 0.65625
Precision 0.13526570048309178
[[1835  537]
 [  44   84]]

Val set
F1 0.2736842105263158
Recall 0.6842105263157895
Precision 0.17105263157894737
[[461 126]
 [ 12  26]]

Test set
F1 0.23529411764705888
Recall 0.6190476190476191
Precision 0.1452513966480447
[[587 153]
 [ 16  26]]


Testing different values for recall precision trade off

In [9]:
percent = [0.6, 0.7, 0.8]

for x in percent:
    print(f"Training set, Percent: {x}")
    print("F1", f1_score(y_train, convert_prob(prob_pipeline.predict_proba(X_train), x)))
    print("Recall",recall_score(y_train, convert_prob(prob_pipeline.predict_proba(X_train), x)))
    print("Precision",precision_score(y_train, convert_prob(prob_pipeline.predict_proba(X_train), x)))
    print(confusion_matrix(y_train, convert_prob(prob_pipeline.predict_proba(X_train), x)))
    print()

    print(f"Validation set, Percent: {x}")
    print("F1", f1_score(y_val, convert_prob(prob_pipeline.predict_proba(X_val), x)))
    print("Recall",recall_score(y_val, convert_prob(prob_pipeline.predict_proba(X_val), x)))
    print("Precision",precision_score(y_val, convert_prob(prob_pipeline.predict_proba(X_val), x)))
    print(confusion_matrix(y_val, convert_prob(prob_pipeline.predict_proba(X_val), x)))
    print()


Training set, Percent: 0.6
F1 0.25523809523809526
Recall 0.5234375
Precision 0.16876574307304787
[[2042  330]
 [  61   67]]

Validation set, Percent: 0.6
F1 0.3165467625899281
Recall 0.5789473684210527
Precision 0.21782178217821782
[[508  79]
 [ 16  22]]

Training set, Percent: 0.7
F1 0.26737967914438504
Recall 0.390625
Precision 0.2032520325203252
[[2176  196]
 [  78   50]]

Validation set, Percent: 0.7
F1 0.31683168316831684
Recall 0.42105263157894735
Precision 0.25396825396825395
[[540  47]
 [ 22  16]]

Training set, Percent: 0.8
F1 0.24369747899159663
Recall 0.2265625
Precision 0.2636363636363636
[[2291   81]
 [  99   29]]

Validation set, Percent: 0.8
F1 0.21917808219178084
Recall 0.21052631578947367
Precision 0.22857142857142856
[[560  27]
 [ 30   8]]



Looks like 0.7 is the best, evaluate again using the test set

In [10]:
print("F1", f1_score(y_test, convert_prob(prob_pipeline.predict_proba(X_test), 0.7)))
print("Recall",recall_score(y_test, convert_prob(prob_pipeline.predict_proba(X_test), 0.7)))
print("Precision",precision_score(y_test, convert_prob(prob_pipeline.predict_proba(X_test), 0.7)))
print(confusion_matrix(y_test, convert_prob(prob_pipeline.predict_proba(X_test), 0.7)))

F1 0.2018348623853211
Recall 0.2619047619047619
Precision 0.16417910447761194
[[684  56]
 [ 31  11]]


TP FP

FN TN

In [1]:
import joblib

#joblib.dump(prob_pipeline, "app_probability_pipeline.pkl")

NOTES

Interpret results based on how it would affect the user.

More recall can prevent negative classes from being detected properly