In [60]:
# Bibliotecas
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [9]:
# Leitura dos dados e remoção de colunas inuteis
dados_alzheimer = pd.read_csv("dados/alzheimers_disease_data.csv")
dados_alzheimer.drop(['PatientID', 'DoctorInCharge'], axis=1, inplace=True)
dados_alzheimer.head()

Unnamed: 0,Age,Gender,Ethnicity,EducationLevel,BMI,Smoking,AlcoholConsumption,PhysicalActivity,DietQuality,SleepQuality,...,FunctionalAssessment,MemoryComplaints,BehavioralProblems,ADL,Confusion,Disorientation,PersonalityChanges,DifficultyCompletingTasks,Forgetfulness,Diagnosis
0,73,0,0,2,22.927749,0,13.297218,6.327112,1.347214,9.025679,...,6.518877,0,0,1.725883,0,0,0,1,0,0
1,89,0,0,0,26.827681,0,4.542524,7.619885,0.518767,7.151293,...,7.118696,0,0,2.592424,0,0,0,0,1,0
2,73,0,3,1,17.795882,0,19.555085,7.844988,1.826335,9.673574,...,5.895077,0,0,7.119548,0,1,0,1,0,0
3,74,1,0,1,33.800817,1,12.209266,8.428001,7.435604,8.392554,...,8.965106,0,1,6.481226,0,0,0,0,0,0
4,89,0,0,0,20.716974,0,18.454356,6.310461,0.795498,5.597238,...,6.045039,0,0,0.014691,0,0,1,1,0,0


In [43]:
# Processamento dos dados para aplicar o algoritmo

# Separar os dados de treinamento e de teste
X, y = dados_alzheimer.drop(columns='Diagnosis'), dados_alzheimer["Diagnosis"]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, shuffle=False) #Divide os dados pela metade

# Normalizar os dados
scaler = StandardScaler()
colunas = ['Age', 'BMI', 'AlcoholConsumption', 'PhysicalActivity', 'DietQuality', 'SleepQuality', 'SystolicBP', 'DiastolicBP', 'CholesterolTotal', 'CholesterolLDL', 'CholesterolHDL', 'CholesterolTriglycerides', 'MMSE', 'FunctionalAssessment', 'ADL']
X_train[colunas] = scaler.fit_transform(X_train[colunas])
X_test[colunas] = scaler.transform(X_test[colunas])

In [66]:
# Aplicacao do DummyClassifier
# É um algoritmo 'Burro' serve para checar se o algoritmo usado é melhor que ele ou não

dummy_clf = DummyClassifier()
dummy_clf.fit(X_train, y_train)
y_pred = dummy_clf.predict(X_test)
print(f"Precisao: {accuracy_score(y_test, y_pred)*100:.2f} %")


Precisao: 63.26 %


In [54]:
# Aplicação do algoritmo K-vizinhos

# Parametros
k = 11 # n° de vizinhos
distance_metric = 'manhattan' # métrica de distância

# Algoritmo
knn_clf = KNeighborsClassifier(n_neighbors=k, metric=distance_metric)
knn_clf.fit(X_train, y_train)
y_pred = knn_clf.predict(X_test)
print(f"Precisao: {accuracy_score(y_test, y_pred)*100:.2f} %")


Precisao: 75.16 %


In [58]:
# Aplicação do algoritmo Rede Perceptron Multicamadas

# Parametros 
mlp = MLPClassifier(
    hidden_layer_sizes=(3, 2),
    activation= 'relu',
    solver= 'sgd',
    learning_rate_init= 0.1,
    max_iter=1000,
    momentum=0,
)

# Algoritmo
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
print(f"Precisao: {accuracy_score(y_test, y_pred)*100:.2f} %")

Precisao: 82.98 %
