In [54]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import joblib


In [2]:
base = pd.read_csv('dataset/alzheimer.csv')
base

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,Nondemented,M,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,Nondemented,M,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,Demented,M,75,12,,23.0,0.5,1678,0.736,1.046
3,Demented,M,76,12,,28.0,0.5,1738,0.713,1.010
4,Demented,M,80,12,,22.0,0.5,1698,0.701,1.034
...,...,...,...,...,...,...,...,...,...,...
368,Demented,M,82,16,1.0,28.0,0.5,1693,0.694,1.037
369,Demented,M,86,16,1.0,26.0,0.5,1688,0.675,1.040
370,Nondemented,F,61,13,2.0,30.0,0.0,1319,0.801,1.331
371,Nondemented,F,63,13,2.0,30.0,0.0,1327,0.796,1.323


In [3]:
# y - variável dependente
# x - variáveis independentes
y = base.iloc[:,0].values
X = base.iloc[:,[1,2,3,4,5,6,7,8,9]].values

In [4]:
labelEncoder = LabelEncoder()

In [5]:
for i in range(X.shape[1]):
  if X[:,i].dtype == 'object':
    X[:,i] = labelEncoder.fit_transform(X[:,i])

In [6]:
# X independentes
# y variável dependente
X_training, X_test, y_training, y_test = train_test_split(X,y, test_size = 0.3, random_state= 1)

In [8]:
RandomForest_model = RandomForestClassifier(random_state=1, n_estimators=1000)
RandomForest_model.fit(X_training, y_training)


In [9]:
Rf_prediction = RandomForest_model.predict(X_test)

In [42]:
NB_model = GaussianNB()
NB_model.fit(X_training, y_training)

In [43]:
DT_model = DecisionTreeClassifier(random_state = 1, max_depth = 10, max_leaf_nodes = 10)
DT_model.fit(X_training, y_training)

In [44]:
NB_predict = NB_model.predict(X_test)

In [45]:
DT_predict = DT_model.predict(X_test)

In [46]:
accuracy = accuracy_score(y_test, Rf_prediction)
precision = precision_score(y_test, Rf_prediction, average = 'weighted')
recall = recall_score(y_test, Rf_prediction, average = 'weighted')
f1 = f1_score(y_test, Rf_prediction,  average = 'weighted')
print(f'Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1: {f1}')

Accuracy: 0.9017857142857143, Precision: 0.9115623182787361, Recall: 0.9017857142857143, F1: 0.8628062975644529


In [47]:
accuracy = accuracy_score(y_test, NP_predict)
precision = precision_score(y_test, NP_predict, average = 'weighted')
recall = recall_score(y_test, NP_predict, average = 'weighted')
f1 = f1_score(y_test, NP_predict,  average = 'weighted')
print(f'Accuracy NB: {accuracy}, Precision NB: {precision}, Recall NB: {recall}, F1 NB: {f1}')

Accuracy NB: 0.9107142857142857, Precision NB: 0.919112907224674, Recall NB: 0.9107142857142857, F1 NB: 0.8810330507309649


In [52]:
NB_report = classification_report(y_test, NB_predict)
print(NB_report)

              precision    recall  f1-score   support

   Converted       1.00      0.17      0.29        12
    Demented       0.93      1.00      0.96        40
 Nondemented       0.90      1.00      0.94        60

    accuracy                           0.91       112
   macro avg       0.94      0.72      0.73       112
weighted avg       0.92      0.91      0.88       112



In [53]:
DT_report = classification_report(y_test, DT_predict)
print(DT_report)

              precision    recall  f1-score   support

   Converted       1.00      0.08      0.15        12
    Demented       0.91      1.00      0.95        40
 Nondemented       0.90      1.00      0.94        60

    accuracy                           0.90       112
   macro avg       0.93      0.69      0.68       112
weighted avg       0.91      0.90      0.86       112



In [55]:
model_directory = './compiled_model/NaiveBayesAlzheimerPrediction.pkl'

In [56]:
joblib.dump(NB_model, model_directory)

['./compiled_model/NaiveBayesAlzheimerPrediction.pkl']