In [2]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import torch

df = pd.read_csv("healthcare-dataset-stroke-data.csv")
df = df.drop(columns=["id"])
df['bmi'] = df['bmi'].fillna(df['bmi'].median())

encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_values = encoder.fit_transform(df[['work_type','smoking_status','Residence_type','ever_married','gender']])
new_cols = encoder.get_feature_names_out(['work_type','smoking_status','Residence_type','ever_married','gender'])

df_encoded = pd.DataFrame(encoded_values,columns=new_cols,index=df.index)
df_final = pd.concat(
    [df.drop(columns=['work_type','smoking_status','Residence_type','ever_married','gender']),df_encoded],
    axis=1)
y = df_final['stroke'].astype(int)
X= df_final.drop('stroke', axis=1)

y = y.values
X = X.values

X_train, X_test, y_train, y_test = train_test_split(X,y ,test_size=0.2 ,random_state=41, stratify=y)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#switching them to tensors 
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)



In [3]:
import torch 
import torch.nn as nn
import numpy as np 

class Model(nn.Module):
    def __init__(self, in_features = X_train.shape[1], out_features=1,h1=32,h2=16):
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)
        self.fc2 = nn.Linear(h1,h2)
        self.output = nn.Linear(h2, out_features)

    def forward(self,X):
        x = torch.relu(self.fc1(X))
        x = torch.relu(self.fc2(x))
        x = self.output(x)
        return x.squeeze(1)

model = Model()

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

#training 
epochs = 200
losses = []

for epoch in range(epochs):
    model.train()
    y_pred = model(X_train)
    loss = criterion(y_pred,y_train.float())
    losses.append(loss.item())

    if epoch % 10 == 0:
        print(f"epoch: {epoch} losses : {loss}")

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()



epoch: 0 losses : 0.655144453048706
epoch: 10 losses : 0.611181378364563
epoch: 20 losses : 0.5662877559661865
epoch: 30 losses : 0.513571560382843
epoch: 40 losses : 0.45169001817703247
epoch: 50 losses : 0.38433200120925903
epoch: 60 losses : 0.31946489214897156
epoch: 70 losses : 0.2656635344028473
epoch: 80 losses : 0.2266334742307663
epoch: 90 losses : 0.20171211659908295
epoch: 100 losses : 0.18740496039390564
epoch: 110 losses : 0.17938509583473206
epoch: 120 losses : 0.1745777428150177
epoch: 130 losses : 0.1713627427816391
epoch: 140 losses : 0.16900770366191864
epoch: 150 losses : 0.16714084148406982
epoch: 160 losses : 0.16558320820331573
epoch: 170 losses : 0.16421456634998322
epoch: 180 losses : 0.16299061477184296
epoch: 190 losses : 0.1618732213973999


In [10]:
from sklearn.metrics import accuracy_score
import torch
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score

with torch.no_grad():
    logits = model(X_test)
    probs = torch.sigmoid(logits)
    for t in [0.1, 0.2, 0.3, 0.4, 0.5]:
     preds = (probs > t).int()
     auc = roc_auc_score(y_test, probs)
     
     print("Threshold:", t)

print(confusion_matrix(y_test, preds))
print("ROC AUC:", auc) 
print(classification_report(y_test, preds))


Threshold: 0.1
Threshold: 0.2
Threshold: 0.3
Threshold: 0.4
Threshold: 0.5
[[972   0]
 [ 50   0]]
ROC AUC: 0.8262139917695472
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97       972
         1.0       0.00      0.00      0.00        50

    accuracy                           0.95      1022
   macro avg       0.48      0.50      0.49      1022
weighted avg       0.90      0.95      0.93      1022



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
