In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.layers.experimental.preprocessing import CategoryEncoding
from tensorflow.keras.layers.experimental.preprocessing import StringLookup

In [2]:
data_used = pd.read_csv(r"C:\Users\tiago\Desktop\code-projects\working\stroke prediction\healthcare-dataset-stroke-data.csv" , keep_default_na=False)
df = data_used.copy()
df = df[df.bmi != "N/A"]
df['bmi'] = df['bmi'].astype('float64')
df = df.astype({"bmi":'int'}) 
df.head()


Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36,formerly smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24,never smoked,1
5,56669,Male,81.0,0,0,Yes,Private,Urban,186.21,29,formerly smoked,1


In [3]:
val_dataframe = df.sample(frac=0.2, random_state=1337)
train_dataframe = df.drop(val_dataframe.index)

In [4]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("stroke")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

In [5]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [6]:
def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

In [7]:
def encode_string_categorical_feature(feature, name, dataset):
    # Create a StringLookup layer which will turn strings into integer indices
    index = StringLookup()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    index.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = index(feature)

    # Create a CategoryEncoding for our integer indices
    encoder = CategoryEncoding(output_mode="binary")

    # Prepare a dataset of indices
    feature_ds = feature_ds.map(index)

    # Learn the space of possible indices
    encoder.adapt(feature_ds)

    # Apply one-hot encoding to our indices
    encoded_feature = encoder(encoded_feature)
    return encoded_feature

In [8]:
def encode_integer_categorical_feature(feature, name, dataset):
    # Create a CategoryEncoding for our integer indices
    encoder = CategoryEncoding(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the space of possible indices
    encoder.adapt(feature_ds)

    # Apply one-hot encoding to our indices
    encoded_feature = encoder(feature)
    return encoded_feature

In [9]:
# Categorical features encoded as integers
hypertension = keras.Input(shape=(1,), name="hypertension", dtype="int64")
heart_disease = keras.Input(shape=(1,), name="heart_disease", dtype="int64")

# Categorical feature encoded as string
gender = keras.Input(shape=(1,), name="gender", dtype="string")
ever_married = keras.Input(shape=(1,), name="ever_married", dtype="string")
work_type = keras.Input(shape=(1,), name="work_type", dtype="string")
Residence_type = keras.Input(shape=(1,), name="Residence_type", dtype="string")
smoking_status = keras.Input(shape=(1,), name="smoking_status", dtype="string")

# Numerical features
age = keras.Input(shape=(1,), name="age")
avg_glucose_level = keras.Input(shape=(1,), name="avg_glucose_level")
bmi = keras.Input(shape=(1,), name="bmi")



In [10]:
all_inputs = [
    hypertension,
    heart_disease,
    gender,
    ever_married,
    work_type,
    Residence_type,
    smoking_status,
    age,
    avg_glucose_level,
    bmi,
]

In [11]:
# Integer categorical features
hypertension_encoded = encode_integer_categorical_feature(hypertension, "hypertension", train_ds)
heart_disease_encoded = encode_integer_categorical_feature(heart_disease, "heart_disease", train_ds)

# String categorical features
gender_encoded = encode_string_categorical_feature(gender, "gender", train_ds)
ever_married_encoded = encode_string_categorical_feature(ever_married, "ever_married", train_ds)
work_type_encoded = encode_string_categorical_feature(work_type, "work_type", train_ds)
Residence_type_encoded = encode_string_categorical_feature(Residence_type, "Residence_type", train_ds)
smoking_status_encoded = encode_string_categorical_feature(smoking_status, "smoking_status", train_ds)


# Numerical features
age_encoded = encode_numerical_feature(age, "age", train_ds)
avg_glucose_level_encoded = encode_numerical_feature(avg_glucose_level, "avg_glucose_level", train_ds)
bmi_encoded = encode_numerical_feature(bmi, "bmi", train_ds)

In [12]:
all_features = layers.concatenate(
    [
        hypertension_encoded,
        heart_disease_encoded,
        gender_encoded,
        ever_married_encoded,
        work_type_encoded,
        Residence_type_encoded,
        smoking_status_encoded,
        age_encoded,
        avg_glucose_level_encoded,
        bmi_encoded,
    ]
)


In [13]:
x = layers.Dense(32, activation="relu")(all_features)
x = layers.Dropout(0.5)(x)
output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

In [14]:
model.fit(train_ds, epochs=50, validation_data=val_ds)

Epoch 1/50


  [n for n in tensors.keys() if n not in ref_input_names])


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1c3f582f248>

In [17]:
sample = {
    "hypertension": 1,
    "heart_disease": 1,
    "gender": "Male",
    "ever_married": "Yes",
    "work_type": "Self-employed",
    "Residence_type": "Urban",
    "smoking_status": "smokes",
    "age": 60,
    "avg_glucose_level": 174,
    "bmi": 32,

}

input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()}
predictions = model.predict(input_dict)

print(
    "This particular patient had a %.1f percent probability "
    "of having a stroke, as evaluated by our model." % (100 * predictions[0][0],)
)

This particular patient had a 15.3 percent probability of having a stroke, as evaluated by our model.
