In [8]:
# IMPORTING LIBRARIES

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import tensorflow as tf

data = pd.read_csv('/content/healthcare-dataset-stroke-data.csv')


In [9]:
# DATA CLEANING

from sklearn.preprocessing import StandardScaler

# Neural Network(Sequential model) will not work if the data is of type string
# Transform the string column into integers

# Select non numerical columns
numerical_features = data.select_dtypes(exclude=['number']).columns

# One-hot encoding because there is no ordinal relationship in gender
# Use pandas get_dummies to perform one-hot encoding
data_encoded = pd.get_dummies(data, columns=numerical_features, drop_first= False)
print(data_encoded.describe())
#print(data.describe())

# Split data into features (X) and target (y)
X = data_encoded.drop('stroke', axis=1)
y = data['stroke']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

                 id          age  hypertension  heart_disease  \
count   5110.000000  5110.000000   5110.000000    5110.000000   
mean   36517.829354    43.226614      0.097456       0.054012   
std    21161.721625    22.612647      0.296607       0.226063   
min       67.000000     0.080000      0.000000       0.000000   
25%    17741.250000    25.000000      0.000000       0.000000   
50%    36932.000000    45.000000      0.000000       0.000000   
75%    54682.000000    61.000000      0.000000       0.000000   
max    72940.000000    82.000000      1.000000       1.000000   

       avg_glucose_level          bmi       stroke  
count        5110.000000  4909.000000  5110.000000  
mean          106.147677    28.893237     0.048728  
std            45.283560     7.854067     0.215320  
min            55.120000    10.300000     0.000000  
25%            77.245000    23.500000     0.000000  
50%            91.885000    28.100000     0.000000  
75%           114.090000    33.100000     0

In [10]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Adjust test_size and random_state as needed

print(f'Training data shape: {X_train.shape}')
print(f'Test data shape: {X_test.shape}')

Training data shape: (4088, 22)
Test data shape: (1022, 22)


In [11]:
# Build a simple neural network model

import keras
from keras import layers

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Use Input layer for input shape
    Dense(64, activation='relu'),  # First hidden layer
    Dense(32, activation='relu'),  # Second hidden layer
    Dense(1, activation='sigmoid')  # Output layer (for binary classification)
])


# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [12]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))

Epoch 1/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9343 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 2/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9569 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 3/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9524 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 4/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9574 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 5/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9586 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 6/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9530 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 7/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [13]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9405 - loss: nan
Test Loss: nan
Test Accuracy: 0.9393346309661865


In [14]:
# Making predictions
predictions = model.predict(X_test)
print(predictions)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[[nan]
 [nan]
 [nan]
 ...
 [nan]
 [nan]
 [nan]]


In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Assuming 'data' is your original DataFrame
# Split the data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)  # Adjust test_size and random_state as needed

# Split training data into features and target
X_train = train_data.drop('stroke', axis=1)  # Features
y_train = train_data['stroke']  # Target

# Split test data into features and target
X_test = test_data.drop('stroke', axis=1)
y_test = test_data['stroke']

# Apply one-hot encoding to categorical columns in training and test datasets
# Use the same columns for both training and testing data
all_data = pd.concat([X_train, X_test])
all_data_encoded = pd.get_dummies(all_data, drop_first=False)

# Split back into training and testing data
X_train = all_data_encoded[:len(X_train)]
X_test = all_data_encoded[len(X_train):]

# Optional: Normalize the features (if required)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit and transform on training data
X_test_scaled = scaler.transform(X_test)  # Only transform on test data

In [19]:
# Build a simple neural network
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train_scaled.shape[1]),  # First hidden layer
    Dense(32, activation='relu'),  # Second hidden layer
    Dense(1, activation='sigmoid')  # Output layer (for binary classification)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
# Train the model
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=8, validation_data=(X_test_scaled, y_test))


Epoch 1/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8898 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 2/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9582 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 3/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9606 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 4/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9502 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 5/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9552 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 6/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9546 - loss: nan - val_accuracy: 0.9393 - val_loss: nan
Epoch 7/50
[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [21]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9405 - loss: nan 
Test Loss: nan, Test Accuracy: 0.9393346309661865


In [22]:
# Make predictions using the trained model
predictions = model.predict(X_test_scaled)

# You can also convert probabilities to class labels (0 or 1 for binary classification)
predicted_classes = (predictions > 0.5).astype("int32")


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
