In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from sklearn.model_selection import train_test_split
from ClassificadorAlfa import ClassificadorAlfa
warnings.filterwarnings('ignore')

In [25]:
df = pd.read_csv(os.path.join('data', 'healthcare-dataset-stroke-data.csv'))

# Hipotese 1

In [26]:
# Separando target e features
try:
    df.drop(['id', 'bmi', 'age', 'avg_glucose_level'], axis=1, inplace=True)
except:
    pass

X = df.drop('stroke', axis=1)
y = df['stroke']

In [27]:
# Obtendo variáveis categoricas
object_features = [feature for feature in X.columns if X[feature].dtype == 'O']
int_features = [feature for feature in X.columns if X[feature].dtype == 'int64']
categorical_features = object_features + int_features

In [28]:
# Obtendo variáveis dummy
X = X[categorical_features]

X = pd.get_dummies(X)
X.hypertension = X.hypertension.astype('bool')
X.heart_disease = X.heart_disease.astype('bool')

y = y.replace(0,-1)

X = X.to_numpy()
y = y.to_numpy()

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5)
X_train = X_train.astype('float64')
y_train = y_train.astype('float64')
X_test = X_test.astype('float64')
y_test = y_test.astype('float64')

In [30]:
# Inicializando os parâmetros do modelo
a = np.random.randn(X_train.shape[1], 1)
b = 1.0

X_train = X_train.T
y_train = y_train.T

parametros = [a, b, X_train, y_train]
learning_rate = 0.0001
num_iteracoes = 50000

In [31]:
classificador = ClassificadorAlfa(learning_rate, num_iteracoes, parametros)

In [32]:
# Treinando o modelo
a, b = classificador.treinar()
a, b

(array([[-0.01482256],
        [-0.52207551],
        [-0.89984293],
        [-0.83841946],
        [ 0.50732865],
        [ 0.35809013],
        [ 0.54862098],
        [-0.08223175],
        [ 0.57881998],
        [-0.18764964],
        [-0.24714749],
        [ 0.18074702],
        [-0.5581565 ],
        [-0.52051221],
        [ 0.02284022],
        [ 0.43565696],
        [ 0.29498708],
        [ 0.1722181 ]]),
 -0.033390522929850744)

In [33]:
# Fazendo previsões
ypred = a.T @ X_test.T + b
ypred

array([[-1.04331349, -1.06481837, -1.00851063, ..., -0.96453891,
        -1.38120826, -1.29810925]])

In [34]:
acuracia = ClassificadorAlfa.acuracia(y_test, ypred)
print(f'A acurácia do modelo foi de {acuracia*100:.2f}%')

A acurácia do modelo foi de 94.95%


In [35]:
# Selecionando as features mais importantes