In [1]:
#Loading packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow import keras

In [3]:
#Importing data
stroke_dta = pd.read_csv(r"C:\\Nithya\Healthcare_Stroke_Data.csv")
stroke_dta.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5110 entries, 0 to 5109
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 5110 non-null   int64  
 1   gender             5110 non-null   object 
 2   age                5110 non-null   float64
 3   hypertension       5110 non-null   int64  
 4   heart_disease      5110 non-null   int64  
 5   ever_married       5110 non-null   object 
 6   work_type          5110 non-null   object 
 7   Residence_type     5110 non-null   object 
 8   avg_glucose_level  5110 non-null   float64
 9   bmi                4909 non-null   float64
 10  smoking_status     5110 non-null   object 
 11  stroke             5110 non-null   int64  
dtypes: float64(3), int64(4), object(5)
memory usage: 479.2+ KB


In [5]:
#Replacing the null values with 0
stroke_dta=stroke_dta.fillna(0)

In [6]:
#Dataset initial rows
stroke_dta.head(6)

Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,0.0,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
5,56669,Male,81.0,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1


In [21]:
#Dividing dataset into independent vrble and dependent vrble
indpndnt = stroke_dta.drop(['id','smoking_status','stroke'],axis=1)
dpndnt = stroke_dta.loc[:,'stroke']

In [22]:
#Transforming categorical vrble into dummy vrble 
gender = pd.get_dummies(indpndnt['gender'],drop_first=True)
ever_married=pd.get_dummies(indpndnt['ever_married'],drop_first=True)
work_type=pd.get_dummies(indpndnt['work_type'],drop_first=True)
Residence_type=pd.get_dummies(indpndnt['Residence_type'],drop_first=True)

In [23]:
#Dropping not required columns
indpndnt.drop(['gender','ever_married','work_type','Residence_type'],axis=1,inplace=True)

In [24]:
#Forming new independent variable dataset
indpndnt = pd.concat([indpndnt,gender,ever_married,work_type,Residence_type],axis=1)

In [25]:
indpndnt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5110 entries, 0 to 5109
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   age                5110 non-null   float64
 1   hypertension       5110 non-null   int64  
 2   heart_disease      5110 non-null   int64  
 3   avg_glucose_level  5110 non-null   float64
 4   bmi                5110 non-null   float64
 5   Male               5110 non-null   uint8  
 6   Other              5110 non-null   uint8  
 7   Yes                5110 non-null   uint8  
 8   Never_worked       5110 non-null   uint8  
 9   Private            5110 non-null   uint8  
 10  Self-employed      5110 non-null   uint8  
 11  children           5110 non-null   uint8  
 12  Urban              5110 non-null   uint8  
dtypes: float64(3), int64(2), uint8(8)
memory usage: 239.7 KB


In [26]:
#Segregating Dataset - Training and Test
X_trn, X_tst, Y_trn, Y_tst = train_test_split(indpndnt,dpndnt,test_size=0.30)

In [27]:
#Converting the variables into Tensorflow type
features = [tf.feature_column.numeric_column('hypertension'),
            tf.feature_column.numeric_column('heart_disease'),
            tf.feature_column.numeric_column('avg_glucose_level'),
            tf.feature_column.numeric_column('bmi'),
            tf.feature_column.numeric_column('Male'),
            tf.feature_column.numeric_column('Other'),
            tf.feature_column.numeric_column('Yes'),
            tf.feature_column.numeric_column('Never_worked'),
            tf.feature_column.numeric_column('Private'),
            tf.feature_column.numeric_column('Self-employed'),
            tf.feature_column.numeric_column('children'),
            tf.feature_column.numeric_column('Urban') 
           ]

In [28]:
#Parameter value assignment function
def model_prmtrs(epochs_num,batches_num,shuffle):
    return tf.compat.v1.estimator.inputs.pandas_input_fn(
        x=X_trn,
        y=Y_trn,
        batch_size=batches_num,
        shuffle=shuffle,
        num_epochs=epochs_num       
    )

In [29]:
#Performance validation function
def model_performance(epochs_num,batches_num,shuffle):
    return tf.compat.v1.estimator.inputs.pandas_input_fn(
        x=X_tst,
        y=Y_tst,
        batch_size=batches_num,
        shuffle=shuffle,
        num_epochs=epochs_num  
    )

In [30]:
#Designing a Artificial Neural Model
Stroke_ann_model = tf.estimator.DNNClassifier(n_classes=3,
                                       optimizer='Adam',
                                       feature_columns=features,
                                       dropout=0.35,
                                       hidden_units=[1024,512,256,32,3],
                                       activation_fn=tf.nn.relu
                                      )

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\alekh\\AppData\\Local\\Temp\\tmpoehf1kbx', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [31]:
#Artificial Neural Model Training using defined epochs and batches
Stroke_ann_model.train(input_fn=model_prmtrs(100,128,True),steps=1300)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\alekh\AppData\Local\Temp\tmpoehf1kbx\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 14.183951, step = 0
INFO:tensorflow:global_step/sec: 71.1179
INFO:tensorflow:loss = 0.9876656, step = 100 (1.406 sec)
INFO:tensorflow:global_step/sec: 79.7124
INFO:tensorflow:loss = 0.8074801, step = 200 (1.255 sec)
INFO:tensorflow:global_step/sec: 78.3157
INFO:tensorflow:loss = 0.46633348, step = 300 (1.279 sec)
INFO:tensorflow:global_step/sec: 80.8992
INFO:tensorflow:loss = 0.4272735, step = 400 (1.

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x1c909d16a00>

In [32]:
#Artificial Neural Network Model performance validation
Stroke_ann_model.evaluate(input_fn=model_performance(100,128,True),steps=1300)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2023-03-03T18:29:57
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\alekh\AppData\Local\Temp\tmpoehf1kbx\model.ckpt-1300
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [130/1300]
INFO:tensorflow:Evaluation [260/1300]
INFO:tensorflow:Evaluation [390/1300]
INFO:tensorflow:Evaluation [520/1300]
INFO:tensorflow:Evaluation [650/1300]
INFO:tensorflow:Evaluation [780/1300]
INFO:tensorflow:Evaluation [910/1300]
INFO:tensorflow:Evaluation [1040/1300]
INFO:tensorflow:Evaluation [1170/1300]
INFO:tensorflow:Inference Time : 5.59842s
INFO:tensorflow:Finished evaluation at 2023-03-03-18:30:03
INFO:tensorflow:Saving dict for global step 1300: accuracy = 0.94781476, average_loss = 0.20391344, global_step = 1300, loss = 0.2039025
INFO:tensorflow:Saving 'checkpoint_path' summary for global st

{'accuracy': 0.94781476,
 'average_loss': 0.20391344,
 'loss': 0.2039025,
 'global_step': 1300}

In [33]:
#Merging Artificial Neural Model and Linear Classifier model
Stroke_ann_linr_model = tf.estimator.DNNLinearCombinedClassifier(n_classes=3,dnn_optimizer='Adam',dnn_hidden_units=[1024,512,256,32,3],dnn_dropout=0.35,dnn_feature_columns=features,dnn_activation_fn='relu',linear_feature_columns=features)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\alekh\\AppData\\Local\\Temp\\tmperii8y22', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [34]:
#Merging Artificial Neural Model and Linear Classifier model training using defined epochs and batches
Stroke_ann_linr_model.train(input_fn=model_prmtrs(100,128,True),steps=1300)

INFO:tensorflow:Calling model_fn.




Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\alekh\AppData\Local\Temp\tmperii8y22\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 4.1216173, step = 0
INFO:tensorflow:global_step/sec: 66.4381
INFO:tensorflow:loss = 0.26113474, step = 100 (1.505 sec)
INFO:tensorflow:global_step/sec: 76.3587
INFO:tensorflow:loss = 0.20946643, step = 200 (1.311 sec)
INFO:tensorflow:global_step/sec: 65.289
INFO:tensorflow:loss = 0.20310056, step = 300 (1.530 sec)
INFO:tensorflow:global_step/sec: 69.6645
INFO:tensorflow:loss = 0.2599422, step = 400 (

<tensorflow_estimator.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifierV2 at 0x1c909d7c430>

In [35]:
#Merging Artificial Neural Model and Linear Classifier model performance validation
Stroke_ann_linr_model.evaluate(input_fn=model_performance(100,128,True),steps=1300)

INFO:tensorflow:Calling model_fn.




INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2023-03-03T18:33:34
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\alekh\AppData\Local\Temp\tmperii8y22\model.ckpt-1300
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [130/1300]
INFO:tensorflow:Evaluation [260/1300]
INFO:tensorflow:Evaluation [390/1300]
INFO:tensorflow:Evaluation [520/1300]
INFO:tensorflow:Evaluation [650/1300]
INFO:tensorflow:Evaluation [780/1300]
INFO:tensorflow:Evaluation [910/1300]
INFO:tensorflow:Evaluation [1040/1300]
INFO:tensorflow:Evaluation [1170/1300]
INFO:tensorflow:Inference Time : 5.89233s
INFO:tensorflow:Finished evaluation at 2023-03-03-18:33:40
INFO:tensorflow:Saving dict for global step 1300: accuracy = 0.94781476, average_loss = 0.20430002, global_step = 1300, loss = 0.20428191
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1300: C:\Users\alekh\AppData\L

{'accuracy': 0.94781476,
 'average_loss': 0.20430002,
 'loss': 0.20428191,
 'global_step': 1300}