[Open DashBoard](https://ml-app-abhinav.herokuapp.com/app/predict_dementia)

In [None]:
import numpy as np
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv ('/kaggle/input/dementia-prediction-dataset/dementia_dataset.csv')
print(df.shape)
df.head()

In [None]:
df.Group.value_counts()

[https://direct.mit.edu/jocn/article/22/12/2677/4983/Open-Access-Series-of-Imaging-Studies-Longitudinal](http://)

Converted - 14 subjects were characterized as nondemented at the time of their initial visit (CDR 0) and were subsequently characterized as demented at a later visit (CDR > 0) 

In [None]:
# converting "Converted" to 'Nondemented' and 'Demented' 
df.loc[df.Group == 'Converted', 'Group'] = df.loc[df.Group == 'Converted'].apply(lambda x: 'Nondemented' if x[11] < 0.5 else 'Demented', axis=1)

In [None]:
df.groupby('Group').agg({'CDR':['min', 'max']})

In [None]:
df.groupby(['Group', 'CDR']).size()

We will not considering Subject ID, MRI ID, Visit, MR Delay, CDR in Building Model

In [None]:
X = df.iloc[:,[5,7,8,9,10,12,13,14] ]
y = df.iloc[:, 2]
X.head(2)

In [None]:
X.isna().sum()[X.isna().sum() !=0]

In [None]:
# SES - Social Economic Status
df.groupby(['Group', 'M/F','SES']).size()

In [None]:
subject_no_ses = df[df.SES.isna()]['Subject ID'].unique().tolist()
df[df['Subject ID'].isin(subject_no_ses)]

In [None]:
df[df['Subject ID']=='OAS2_0007']

In [None]:
# if female then 3 or if male then 4
X.loc[X['M/F']=='M', 'SES'] = X.loc[X['M/F']=='M', 'SES'].fillna(4)
X.loc[X['M/F']=='F', 'SES'] = X.loc[X['M/F']=='F', 'SES'].fillna(3)

In [None]:
# MMSE - mini mental state examination

# df.groupby(['Group', 'M/F','MMSE']).size()
subject_no_ses = df[df.MMSE.isna()]['Subject ID'].unique().tolist()
df[df['Subject ID'].isin(subject_no_ses)]

In [None]:
unique_p = df[df.Group == 'Demented']['Subject ID'].unique().tolist()
unique_p_df = df[df['Subject ID'] == unique_p[np.random.randint(len(unique_p))]]
plt.figure(figsize= (8,6))
sns.lineplot(data=unique_p_df, x="Visit", y="MMSE", hue="Group", alpha=0.7)
plt.show()


In [None]:
result={'increase':0, 'decrease':0, 'constant':0}
dec_per = []
for i in unique_p:
    unique_p_df = df[df['Subject ID'] == i]
    if unique_p_df.iloc[-1, 10]>unique_p_df.iloc[0, 10]:
        result['increase']+=1
    elif unique_p_df.iloc[-1, 10]<unique_p_df.iloc[0, 10]:
        result['decrease']+=1
        dec_per.append((unique_p_df.iloc[0, 10]-unique_p_df.iloc[-1, 10])*100/unique_p_df.iloc[0, 10])
    else:
        result['constant']+=1

print('if subject have dementia then how it will influence his/her MMSE score ')
result

In [None]:
print(f'Median percentage decrese is {pd.Series(dec_per).median()}')

In [None]:
X.MMSE.fillna(24, inplace=True)

In [None]:
X.isna().sum()[X.isna().sum() !=0]

In [None]:


X['M/F'].replace({'F':0, 'M':1}, inplace=True)

y.replace({'Nondemented':0, 'Demented':1}, inplace=True)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.2, random_state = 1, stratify = y)

In [None]:
from sklearn.preprocessing import StandardScaler

std_scaler=StandardScaler()
X_train_std=std_scaler.fit_transform(X_train)
X_test_std=std_scaler.transform (X_test)

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
model=ExtraTreesClassifier(random_state=42)

In [None]:
from sklearn.metrics import accuracy_score, recall_score


model.fit(X_train_std, y_train)
y_pred=model.predict(X_test_std )
print(f'Accuracy train score: %.4f' %model.score(X_train_std, y_train))
print(f'Accuracy test score: %.4f' %accuracy_score(y_test, y_pred))
print(f'Recall score: %.4f' %recall_score(y_test, y_pred, average='macro'))

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix=confusion_matrix(y_test, y_pred)

print('Number of records in the test dataset: %d\n' %y_test.shape[0])

plt.figure(figsize=(8,6))
a=sns.heatmap(conf_matrix,annot=True, cmap='Blues', cbar=False, fmt='d')
a.set_xlabel('\nPredicted label', size = 14)
a.set_ylabel('True label\n', size = 14)

plt.show()

In [None]:
import shap
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

shap.summary_plot(shap_values, X_test,max_display=15, plot_type="bar")

In [None]:
shap.summary_plot(shap_values[1], X_test,max_display=15,plot_type="violin")