In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import kagglehub

# Download latest version
path = kagglehub.dataset_download("fedesoriano/stroke-prediction-dataset")

print("Path to dataset files:", path)


Path to dataset files: /home/muhnatha/.cache/kagglehub/datasets/fedesoriano/stroke-prediction-dataset/versions/1


# Load Dataset

In [22]:
# Load Dataset
import os

csv_path = os.path.join(path, "healthcare-dataset-stroke-data.csv")

# Load dataset
df = pd.read_csv(csv_path)
print("Shape:", df.shape)
df.head()

Shape: (5110, 12)


Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


# Preprocessing

In [23]:
# Handle missing value
df.isnull().sum()

id                     0
gender                 0
age                    0
hypertension           0
heart_disease          0
ever_married           0
work_type              0
Residence_type         0
avg_glucose_level      0
bmi                  201
smoking_status         0
stroke                 0
dtype: int64

In [24]:
median_value = df['bmi'].median()
df['bmi'].fillna(median_value, inplace=True)

df.isnull().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['bmi'].fillna(median_value, inplace=True)


id                   0
gender               0
age                  0
hypertension         0
heart_disease        0
ever_married         0
work_type            0
Residence_type       0
avg_glucose_level    0
bmi                  0
smoking_status       0
stroke               0
dtype: int64

In [25]:
# Encoding
df = pd.get_dummies(df, columns=['gender','ever_married','work_type','Residence_type','smoking_status'],drop_first=True)
df = df.astype(int)
df

Unnamed: 0,id,age,hypertension,heart_disease,avg_glucose_level,bmi,stroke,gender_Male,gender_Other,ever_married_Yes,work_type_Never_worked,work_type_Private,work_type_Self-employed,work_type_children,Residence_type_Urban,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes
0,9046,67,0,1,228,36,1,1,0,1,0,1,0,0,1,1,0,0
1,51676,61,0,0,202,28,1,0,0,1,0,0,1,0,0,0,1,0
2,31112,80,0,1,105,32,1,1,0,1,0,1,0,0,0,0,1,0
3,60182,49,0,0,171,34,1,0,0,1,0,1,0,0,1,0,0,1
4,1665,79,1,0,174,24,1,0,0,1,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5105,18234,80,1,0,83,28,0,0,0,1,0,1,0,0,1,0,1,0
5106,44873,81,0,0,125,40,0,0,0,1,0,0,1,0,1,0,1,0
5107,19723,35,0,0,82,30,0,0,0,1,0,0,1,0,0,0,1,0
5108,37544,51,0,0,166,25,0,1,0,1,0,1,0,0,0,1,0,0


In [27]:
# Train Test Split
X = df.drop('stroke', axis=1).values.astype(float)
y = df['stroke'].values.reshape(-1, 1).astype(int)

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X_tr = X_tr.T
X_te = X_te.T
y_tr = y_tr.T
y_te = y_te.T

print("Train shapes:", X_tr.shape, y_tr.shape, "Test shapes:", X_te.shape, y_te.shape)

Train shapes: (17, 4088) (1, 4088) Test shapes: (17, 1022) (1, 1022)


# Training

In [34]:
from sklearn.metrics import accuracy_score
import pandas as pd
from neural_network import NeuralNetwork   # import from neural_network.py
import time
import numpy as np

# Configs
activations = ['sigmoid', 'relu', 'tanh']
gd_types = [
    ('batch', None),    # full-batch
    ('mini', 32),       # mini-batch
    ('sgd', 1)          # stochastic
]

results = []

n_features = X_tr.shape[0]
architecture = [n_features, 8, 1]  

EPOCHS = 50
LR = 0.01

for act in activations:
    for gd_name, batch_size in gd_types:
        print("-" * 60)
        print(f"Activation={act}, GD={gd_name}, batch_size={batch_size}")
        
        # fresh network each run
        nn = NeuralNetwork(
            layers=architecture,
            activation=act,
            loss="bce",   # for binary classification
            lr=LR
        )
        
        start = time.time()
        nn.train(X_tr, y_tr, epochs=EPOCHS, batch_size=batch_size)
        elapsed = time.time() - start
        
        # Predictions (threshold 0.5 for classification)
        yhat_tr = (nn.predict(X_tr) >= 0.5).astype(int)
        yhat_te = (nn.predict(X_te) >= 0.5).astype(int)
        
        # Flatten
        y_tr_flat = y_tr.flatten()
        y_te_flat = y_te.flatten()
        tr_flat = yhat_tr.flatten()
        te_flat = yhat_te.flatten()
        
        row = {
            "activation": act,
            "gd_type": gd_name,
            "batch_size": batch_size,
            "train_acc": accuracy_score(y_tr_flat, tr_flat),
            "test_acc": accuracy_score(y_te_flat, te_flat),
            "epochs": EPOCHS,
            "time_sec": elapsed
        }
        print(row)
        results.append(row)

# Results table
df_results = pd.DataFrame(results)
df_results = df_results.sort_values("test_acc", ascending=False).reset_index(drop=True)
df_results


------------------------------------------------------------
Activation=sigmoid, GD=batch, batch_size=None
Epoch 0, Loss: nan
Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan
Epoch 40, Loss: nan
{'activation': 'sigmoid', 'gd_type': 'batch', 'batch_size': None, 'train_acc': 0.951320939334638, 'test_acc': 0.9510763209393346, 'epochs': 50, 'time_sec': 0.0767204761505127}
------------------------------------------------------------
Activation=sigmoid, GD=mini, batch_size=32
Epoch 0, Loss: nan
Epoch 10, Loss: nan


  return 1 / (1 + np.exp(-x))
  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))
  return 1 / (1 + np.exp(-x))
  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))


Epoch 20, Loss: nan
Epoch 30, Loss: nan
Epoch 40, Loss: nan
{'activation': 'sigmoid', 'gd_type': 'mini', 'batch_size': 32, 'train_acc': 0.951320939334638, 'test_acc': 0.9510763209393346, 'epochs': 50, 'time_sec': 0.458524227142334}
------------------------------------------------------------
Activation=sigmoid, GD=sgd, batch_size=1


  return 1 / (1 + np.exp(-x))
  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))


Epoch 0, Loss: nan
Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan
Epoch 40, Loss: nan
{'activation': 'sigmoid', 'gd_type': 'sgd', 'batch_size': 1, 'train_acc': 0.951320939334638, 'test_acc': 0.9510763209393346, 'epochs': 50, 'time_sec': 8.863228559494019}
------------------------------------------------------------
Activation=relu, GD=batch, batch_size=None
Epoch 0, Loss: nan
Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan
Epoch 40, Loss: nan
{'activation': 'relu', 'gd_type': 'batch', 'batch_size': None, 'train_acc': 0.951320939334638, 'test_acc': 0.9510763209393346, 'epochs': 50, 'time_sec': 0.022118091583251953}
------------------------------------------------------------
Activation=relu, GD=mini, batch_size=32
Epoch 0, Loss: nan
Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan


  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))
  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))


Epoch 40, Loss: nan
{'activation': 'relu', 'gd_type': 'mini', 'batch_size': 32, 'train_acc': 0.04867906066536203, 'test_acc': 0.04892367906066536, 'epochs': 50, 'time_sec': 0.2704191207885742}
------------------------------------------------------------
Activation=relu, GD=sgd, batch_size=1
Epoch 0, Loss: nan


  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))


Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan
Epoch 40, Loss: nan
{'activation': 'relu', 'gd_type': 'sgd', 'batch_size': 1, 'train_acc': 0.04867906066536203, 'test_acc': 0.04892367906066536, 'epochs': 50, 'time_sec': 6.886667013168335}
------------------------------------------------------------
Activation=tanh, GD=batch, batch_size=None
Epoch 0, Loss: nan
Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan
Epoch 40, Loss: nan
{'activation': 'tanh', 'gd_type': 'batch', 'batch_size': None, 'train_acc': 0.951320939334638, 'test_acc': 0.9510763209393346, 'epochs': 50, 'time_sec': 0.02416825294494629}
------------------------------------------------------------
Activation=tanh, GD=mini, batch_size=32
Epoch 0, Loss: nan
Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan


  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))
  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))


Epoch 40, Loss: nan
{'activation': 'tanh', 'gd_type': 'mini', 'batch_size': 32, 'train_acc': 0.951320939334638, 'test_acc': 0.9510763209393346, 'epochs': 50, 'time_sec': 0.2756469249725342}
------------------------------------------------------------
Activation=tanh, GD=sgd, batch_size=1
Epoch 0, Loss: nan


  return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))


Epoch 10, Loss: nan
Epoch 20, Loss: nan
Epoch 30, Loss: nan
Epoch 40, Loss: nan
{'activation': 'tanh', 'gd_type': 'sgd', 'batch_size': 1, 'train_acc': 0.951320939334638, 'test_acc': 0.9510763209393346, 'epochs': 50, 'time_sec': 6.871386528015137}


Unnamed: 0,activation,gd_type,batch_size,train_acc,test_acc,epochs,time_sec
0,sigmoid,batch,,0.951321,0.951076,50,0.07672
1,sigmoid,mini,32.0,0.951321,0.951076,50,0.458524
2,sigmoid,sgd,1.0,0.951321,0.951076,50,8.863229
3,relu,batch,,0.951321,0.951076,50,0.022118
4,tanh,batch,,0.951321,0.951076,50,0.024168
5,tanh,sgd,1.0,0.951321,0.951076,50,6.871387
6,tanh,mini,32.0,0.951321,0.951076,50,0.275647
7,relu,mini,32.0,0.048679,0.048924,50,0.270419
8,relu,sgd,1.0,0.048679,0.048924,50,6.886667
