In [148]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import joblib

In [149]:
dataset = pd.read_csv('dataset/alzheimer.csv')
dataset.head()

Unnamed: 0,Group,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,Nondemented,M,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,Nondemented,M,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,Demented,M,75,12,,23.0,0.5,1678,0.736,1.046
3,Demented,M,76,12,,28.0,0.5,1738,0.713,1.01
4,Demented,M,80,12,,22.0,0.5,1698,0.701,1.034


In [150]:
# y - dependent variable
# x - independent variables
y = dataset['Group']
X = dataset.iloc[:,1:]

In [151]:
X.isnull().sum()

M/F      0
Age      0
EDUC     0
SES     19
MMSE     2
CDR      0
eTIV     0
nWBV     0
ASF      0
dtype: int64

In [152]:
SESMedian = X['SES'].median()
SESMedian

2.0

In [153]:
X['SES'].fillna(SESMedian, inplace=True)

In [154]:
X.isnull().sum()

M/F     0
Age     0
EDUC    0
SES     0
MMSE    2
CDR     0
eTIV    0
nWBV    0
ASF     0
dtype: int64

In [155]:
MMSEMedian = X['MMSE'].median()
MMSEMedian

29.0

In [156]:
X['MMSE'].fillna(MMSEMedian, inplace=True)

In [157]:
labelEncoder = LabelEncoder()
X['M/F'] = labelEncoder.fit_transform(X['M/F'])
X.head()

Unnamed: 0,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,1,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,1,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,1,75,12,2.0,23.0,0.5,1678,0.736,1.046
3,1,76,12,2.0,28.0,0.5,1738,0.713,1.01
4,1,80,12,2.0,22.0,0.5,1698,0.701,1.034


In [158]:
standardScaler = StandardScaler()
standardizedData = standardScaler.fit_transform(X.iloc[:,1:9])

In [159]:
X = pd.concat([X, pd.DataFrame(standardizedData,columns=['Age_N', 'EDUC_N', 'SES_N', 'MMSE_N', 'CDR_N', 'eTIV', 'nWBV', 'ASF'])], axis = 1)
X.drop(columns=['Age', 'EDUC', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV', 'ASF'], inplace=True)
X.head()

Unnamed: 0,M/F,Age_N,EDUC_N,SES_N,MMSE_N,CDR_N
0,1,1.308738,-0.208132,-0.394466,-0.095686,-0.777653
1,1,1.439787,-0.208132,-0.394466,0.721664,-0.777653
2,1,-0.263856,-0.904394,-0.394466,-1.185486,0.55905
3,1,-0.132806,-0.904394,-0.394466,0.176764,0.55905
4,1,0.391392,-0.904394,-0.394466,-1.457936,0.55905


In [160]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

selection = SelectKBest(chi2, k=4)
X = selection.fit_transform(X, y)

In [161]:
# y - dependent variable
# x - independent variables
X_training, X_test, y_training, y_test = train_test_split(X,y, test_size=0.4, random_state=1)

In [162]:
NaiveBayes_model = GaussianNB()
NaiveBayes_model.fit(X_training, y_training)

In [163]:
NaiveBayes_prediction = NaiveBayes_model.predict(X_test)

In [164]:
#Naive bayes model metrics
NaiveBayesAccuracy = accuracy_score(y_test, NaiveBayes_prediction)
NaiveBayesPrecision = precision_score(y_test, NaiveBayes_prediction, average = 'weighted')
NaiveBayesRecall = recall_score(y_test, NaiveBayes_prediction, average = 'weighted')
NaiveBayesF1 = f1_score(y_test, NaiveBayes_prediction,  average = 'weighted')
print(f'Naive bayes Accuracy: {NaiveBayesAccuracy}, Naive Bayes Precision: {NaiveBayesPrecision}, Naive Bayes Recall: {NaiveBayesRecall}, Naive Bayes F1: {NaiveBayesF1}')

Naive bayes Accuracy: 0.9, Naive Bayes Precision: 0.8651238825031928, Naive Bayes Recall: 0.9, Naive Bayes F1: 0.8641633746509639


In [166]:
NaiveBayes_report = classification_report(y_test, NaiveBayes_prediction)
print(NaiveBayes_report)

              precision    recall  f1-score   support

   Converted       0.50      0.07      0.12        15
    Demented       0.88      0.98      0.93        52
 Nondemented       0.92      1.00      0.96        83

    accuracy                           0.90       150
   macro avg       0.77      0.68      0.67       150
weighted avg       0.87      0.90      0.86       150

