In [205]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np


In [206]:
dataset = pd.read_csv('data/healthcare-dataset-stroke-data.csv' , sep=',')

dataset = dataset.drop(dataset.columns[0], axis=1)

dataset = dataset.dropna()

dataset.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 4909 entries, 0 to 5109
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   gender             4909 non-null   object 
 1   age                4909 non-null   float64
 2   hypertension       4909 non-null   int64  
 3   heart_disease      4909 non-null   int64  
 4   ever_married       4909 non-null   object 
 5   work_type          4909 non-null   object 
 6   Residence_type     4909 non-null   object 
 7   avg_glucose_level  4909 non-null   float64
 8   bmi                4909 non-null   float64
 9   smoking_status     4909 non-null   object 
 10  stroke             4909 non-null   int64  
dtypes: float64(3), int64(3), object(5)
memory usage: 460.2+ KB


In [207]:
dataset["gender"] = dataset["gender"].replace(["Other"], "Female")
gender_conversion = {"Male": 0, "Female": 1}
dataset["gender"] = dataset["gender"].map(gender_conversion)
dataset["gender"] = dataset["gender"].astype(int)


In [208]:
married_conversion = {"No": 0, "Yes": 1}
dataset["ever_married"] = dataset["ever_married"].map(married_conversion)
dataset["ever_married"] = dataset["ever_married"].astype(int)


In [209]:
df_work_ohe = pd.get_dummies(
    dataset["work_type"], 
    prefix="work_ohe", 
    drop_first=True,
)
dataset = pd.concat([dataset, df_work_ohe], axis=1)
dataset = dataset.drop(["work_type"], axis=1)


In [210]:
residence_conversion = {"Rural": 0, "Urban": 1}
dataset["Residence_type"] = dataset["Residence_type"].map(residence_conversion)
dataset["Residence_type"] = dataset["Residence_type"].astype(int)


In [211]:
df_smoking_ohe = pd.get_dummies(
    dataset["smoking_status"], 
    prefix="smoking_ohe", 
    drop_first=True,
)
df_smoking_ohe = df_smoking_ohe.rename(columns={
    "smoking_ohe_never smoked": "smoking_ohe_never_smoked", 
    "smoking_ohe_formerly smoked": "smoking_ohe_formerly_smoked",
})
dataset = pd.concat([dataset, df_smoking_ohe], axis=1)
dataset = dataset.drop(["smoking_status"], axis=1)


In [212]:
x = dataset.iloc[:, 0:10].values
y = dataset.iloc[:, 10].values


In [213]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

x_train[:, 1] = x_train[:, 1].astype(np.float32)


In [214]:
model = Sequential()
model.add(Dense(12, input_dim=10, activation='relu', kernel_initializer='uniform'))
model.add(Dense(8, activation='relu', kernel_initializer='uniform'))
model.add(Dense(1, activation='sigmoid', kernel_initializer='uniform'))


In [215]:
model.summary()


Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_45 (Dense)            (None, 12)                132       
                                                                 
 dense_46 (Dense)            (None, 8)                 104       
                                                                 
 dense_47 (Dense)            (None, 1)                 9         
                                                                 
Total params: 245
Trainable params: 245
Non-trainable params: 0
_________________________________________________________________


In [216]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [217]:
model.fit(x_train, y_train, epochs=150, batch_size=10)


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x17e3a94bdf0>

In [218]:
loss, accuracy = model.evaluate(x_test, y_test)
print("\nLoss: %.2f, Acurácia: %.2f%%" % (loss, accuracy*100))


Loss: 0.53, Acurácia: 71.76%


In [219]:
predictions = model.predict(x)



In [220]:
# Ajusta as previsões e imprime o resultado
previsões = [round(x[0]) for x in predictions]
print(previsões)

[1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 