### Importing the dependencies


In [3]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Data Collection

In [4]:
df=pd.read_csv(r"F:\DL_Projects_MS\dataset\dataset.csv")
df.head()

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,30669,Male,3.0,0,0,No,children,Rural,95.12,18.0,,0
1,30468,Male,58.0,1,0,Yes,Private,Urban,87.96,39.2,never smoked,0
2,16523,Female,8.0,0,0,No,Private,Urban,110.89,17.6,,0
3,56543,Female,70.0,0,0,Yes,Private,Rural,69.04,35.9,formerly smoked,0
4,46136,Male,14.0,0,0,No,Never_worked,Rural,161.28,19.1,,0


### Data Preprocessing


In [5]:
df.drop(columns=["id"],inplace=True)
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,3.0,0,0,No,children,Rural,95.12,18.0,,0
1,Male,58.0,1,0,Yes,Private,Urban,87.96,39.2,never smoked,0
2,Female,8.0,0,0,No,Private,Urban,110.89,17.6,,0
3,Female,70.0,0,0,Yes,Private,Rural,69.04,35.9,formerly smoked,0
4,Male,14.0,0,0,No,Never_worked,Rural,161.28,19.1,,0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43400 entries, 0 to 43399
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             43400 non-null  object 
 1   age                43400 non-null  float64
 2   hypertension       43400 non-null  int64  
 3   heart_disease      43400 non-null  int64  
 4   ever_married       43400 non-null  object 
 5   work_type          43400 non-null  object 
 6   Residence_type     43400 non-null  object 
 7   avg_glucose_level  43400 non-null  float64
 8   bmi                41938 non-null  float64
 9   smoking_status     30108 non-null  object 
 10  stroke             43400 non-null  int64  
dtypes: float64(3), int64(3), object(5)
memory usage: 3.6+ MB


In [7]:
df.describe()

Unnamed: 0,age,hypertension,heart_disease,avg_glucose_level,bmi,stroke
count,43400.0,43400.0,43400.0,43400.0,41938.0,43400.0
mean,42.217894,0.093571,0.047512,104.48275,28.605038,0.018041
std,22.519649,0.291235,0.212733,43.111751,7.77002,0.133103
min,0.08,0.0,0.0,55.0,10.1,0.0
25%,24.0,0.0,0.0,77.54,23.2,0.0
50%,44.0,0.0,0.0,91.58,27.7,0.0
75%,60.0,0.0,0.0,112.07,32.9,0.0
max,82.0,1.0,1.0,291.05,97.6,1.0


In [8]:
df.duplicated()

0        False
1        False
2        False
3        False
4        False
         ...  
43395    False
43396    False
43397    False
43398    False
43399    False
Length: 43400, dtype: bool

In [9]:
df.isnull().sum()

gender                   0
age                      0
hypertension             0
heart_disease            0
ever_married             0
work_type                0
Residence_type           0
avg_glucose_level        0
bmi                   1462
smoking_status       13292
stroke                   0
dtype: int64

### Encoding using Label Encoder


In [10]:
for col in df.select_dtypes(include=["object"]).columns:
    encoder=LabelEncoder()
    df[col]=encoder.fit_transform(df[col])

In [11]:
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,1,3.0,0,0,0,4,0,95.12,18.0,3,0
1,1,58.0,1,0,1,2,1,87.96,39.2,1,0
2,0,8.0,0,0,0,2,1,110.89,17.6,3,0
3,0,70.0,0,0,1,2,0,69.04,35.9,0,0
4,1,14.0,0,0,0,1,0,161.28,19.1,3,0


### Filling Null Values

In [None]:
df["bmi"].fillna(df["bmi"].mean())
df["smoking_status"].fillna(df["smoking_status"].mode())
df.head()

In [31]:
df.isnull().sum()

gender               0
age                  0
hypertension         0
heart_disease        0
ever_married         0
work_type            0
Residence_type       0
avg_glucose_level    0
bmi                  0
smoking_status       0
stroke               0
dtype: int64

### Using Standard Scaler to scale values

In [14]:
scaler=StandardScaler()
scaler.fit_transform(df)

array([[ 1.20108119, -1.74151677, -0.32129564, ..., -1.38846925,
         1.28640708, -0.13554685],
       [ 1.20108119,  0.700823  ,  3.11239826, ...,  1.38714994,
        -0.53968634, -0.13554685],
       [-0.83084068, -1.51948588, -0.32129564, ..., -1.44083942,
         1.28640708, -0.13554685],
       ...,
       [-0.83084068,  1.76657127,  3.11239826, ...,  0.03861798,
        -1.45273305, -0.13554685],
       [ 1.20108119, -0.0984882 , -0.32129564, ...,  0.60159734,
        -0.53968634, -0.13554685],
       [-0.83084068,  1.76657127, -0.32129564, ..., -1.04806312,
        -0.53968634, -0.13554685]], shape=(43400, 11))

### Feature Selection

In [15]:
x=df.drop(columns=["stroke"],axis=1)

In [16]:
x

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status
0,1,3.0,0,0,0,4,0,95.12,18.0,3
1,1,58.0,1,0,1,2,1,87.96,39.2,1
2,0,8.0,0,0,0,2,1,110.89,17.6,3
3,0,70.0,0,0,1,2,0,69.04,35.9,0
4,1,14.0,0,0,0,1,0,161.28,19.1,3
...,...,...,...,...,...,...,...,...,...,...
43395,0,10.0,0,0,0,4,1,58.64,20.4,1
43396,0,56.0,0,0,1,0,1,213.61,55.4,0
43397,0,82.0,1,0,1,2,1,91.94,28.9,0
43398,1,40.0,0,0,1,2,1,99.16,33.2,1


In [17]:
y=df["stroke"]
y

0        0
1        0
2        0
3        0
4        0
        ..
43395    0
43396    0
43397    0
43398    0
43399    0
Name: stroke, Length: 43400, dtype: int64

### Splitting data for training and testing purpose

In [18]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

### Importing dependencies for Deep Learning Model

In [19]:
import tensorflow
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

### Using Sequential to prepare ANN

In [20]:
model=Sequential()
model.add(Dense(10,activation="relu",input_dim=10))
model.add(Dense(10,activation="relu"))
model.add(Dense(10,activation="relu"))
model.add(Dense(10,activation="relu"))
model.add(Dense(10,activation="relu"))
model.add(Dense(1,activation="sigmoid"))
 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### Model Summary

In [21]:
model.summary()

### Model Training

In [22]:
model.compile(optimizer="Adam",loss="binary_crossentropy")

In [23]:
model.fit(x_train,y_train,epochs=50)

Epoch 1/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - loss: 0.1783
Epoch 2/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0828
Epoch 3/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0818
Epoch 4/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0808
Epoch 5/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0800
Epoch 6/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0795
Epoch 7/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.0788
Epoch 8/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 0.0784
Epoch 9/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0778
Epoch 10/50
[1m1085/1085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x1f0fc7e6050>

### Model Prediction

In [24]:
y_pred=model.predict(x_test)

[1m272/272[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [25]:
y_pred_int=np.argmax(y_pred,axis=1)
y_pred_int

array([0, 0, 0, ..., 0, 0, 0], shape=(8680,))

### Model Evaluation

In [26]:
accuracy_score(y_test,y_pred_int)

0.9814516129032258

### Testing on Sample Data

In [27]:
new_data = np.array([[1,32,0,0,0,4,0,100,18,3]])
y_pred_new = model.predict(new_data)        
y_pred_int_new = np.argmax(y_pred_new, axis=1)   
y_pred_label = encoder.fit_transform(y_pred_int_new)   
print("Predicted Cerebral Stroke:", y_pred_label[0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
Predicted Cerebral Stroke: 0
