In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import datetime

In [None]:
csv = pd.read_csv('../input/stroke-prediction-dataset/healthcare-dataset-stroke-data.csv')
csv.head(5)

In [None]:
csv.drop(['id'], axis=1, inplace=True)

In [None]:
def mapping(data,feature):
    featureMap=dict()
    count=0.0
    for i in sorted(data[feature].unique(),reverse=True):
        featureMap[i]=count
        count=count+1.0
    data[feature]=data[feature].map(featureMap).convert_dtypes(infer_objects=False, convert_integer=False, convert_floating= True)
    return data

def normalizeCol (data, columnName, normRange):
    data[columnName]=(((data[columnName]-data[columnName].min())/(data[columnName].max()-data[columnName].min()))*(normRange[1]-normRange[0]))-normRange[0]
    #normalize age
    return data.convert_dtypes(infer_objects=False, convert_integer=False, convert_floating= True)

In [None]:
csv.gender.unique()

In [None]:
csv = mapping(csv, "gender") #quantize gender
csv = normalizeCol(csv, "gender", [0, 1])

In [None]:
# csv.gender.unique() 
# csv[csv['gender']== 0.5]

In [None]:
# csv.age.unique()
# csv[csv['age']== (csv.age.max())] #validating data

In [None]:
csv = normalizeCol(csv, "age", [0, 1])

In [None]:
# csv[csv['age']== 1.00000000e+00] #validation

In [None]:
csv = mapping(csv, "ever_married")
csv = normalizeCol(csv, "ever_married", [0, 1])

In [None]:
csv = mapping(csv, "work_type")
csv = normalizeCol(csv, "work_type", [0, 1])

In [None]:
csv = mapping(csv, "Residence_type")
csv = normalizeCol(csv, "Residence_type", [0, 1])

In [None]:
csv = normalizeCol(csv, "avg_glucose_level", [0, 1])

In [None]:
csv = csv.fillna(csv['bmi'].mean())
csv = mapping(csv, "bmi")
csv = normalizeCol(csv, "bmi", [0, 1])

In [None]:
csv = mapping(csv, "smoking_status")
csv = normalizeCol(csv, "smoking_status", [0, 1])

In [None]:
csv = normalizeCol(csv, "stroke", [0, 1])

In [None]:
X, y = csv.values[:, :-1], csv.values[:, -1]
X= X.astype('float32')
y = LabelEncoder().fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
n_features = X_train.shape[1] #feature selection

In [None]:
model = tf.keras.models.Sequential() #the model
model.add(tf.keras.layers.Dense(n_features, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
model.add(tf.keras.layers.Dense(6, activation='relu', kernel_initializer='he_normal'))
model.add(tf.keras.layers.Dense(14, activation='relu', kernel_initializer='he_normal'))
model.add(tf.keras.layers.Dense(24, activation='relu', kernel_initializer='he_normal'))
model.add(tf.keras.layers.Dense(6, activation='relu', kernel_initializer='he_normal'))
model.add(tf.keras.layers.Dense(2, activation='softmax'))
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['loss'])

In [None]:
# try:
#     model = tf.keras.models.load_model('trained_model')
# except:
#     print("no model file, training from scratch")
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.fit(X_train, y_train, epochs=64, batch_size=64, verbose=0, validation_data=(X_test, y_test))

# model.save('trained_model')
# pickle.dump(history, open("history.p", "wb"))
# model = tf.keras.models.load_model('trained_model')
# history = pickle.load(open("history.p", "rb"))

In [None]:
loss, acc = model.evaluate(X_test, y_test, verbose=0)

In [None]:
print('Test Accuracy: %.3f' % acc)