In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv('/kaggle/input/dementia-prediction-dataset/dementia_dataset.csv')

In [None]:
import warnings
warnings.filterwarnings('ignore')

# 0. Preprocessing

In [None]:
df

In [None]:
df['Group'].unique()

In [None]:
df=df.replace('Nondemented', 0).replace('Demented', 1).replace('Converted',2)

In [None]:
df.head()

In [None]:
df1=df.drop(['Subject ID', 'MRI ID'], axis=1)

In [None]:
df1= pd.get_dummies(df1)

In [None]:
df1

# 2. Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(df1.corr(),annot=True)

In [None]:
sns.pairplot(df1,hue='Group',size=2)

# Age, SES and EDUC by CDR

In [None]:
plt.figure(figsize=(12, 12))
plt.legend(fontsize=10)
plt.tick_params(labelsize=10)
ax=sns.scatterplot(x=df1['Age'],y=df1['SES'],hue=df1['CDR'],size=df1['EDUC'],data=df1,sizes=(50,500))
plt.setp(ax.get_xticklabels(), rotation=90)
ax.legend(loc='upper left',bbox_to_anchor=(1,1))

# MMSE, nWBV and ASF by CDR

In [None]:
plt.figure(figsize=(12, 12))
plt.legend(fontsize=10)
plt.tick_params(labelsize=10)
ax=sns.scatterplot(x=df1['MMSE'],y=df1['nWBV'],hue=df1['CDR'],size=df1['ASF'],data=df1,sizes=(50,500))
plt.setp(ax.get_xticklabels(), rotation=90)
ax.legend(loc='upper left',bbox_to_anchor=(1,1))

# eTIV, ASF and CDR by Group

In [None]:
plt.figure(figsize=(12, 12))
plt.legend(fontsize=10)
plt.tick_params(labelsize=10)
ax=sns.scatterplot(x=df1['eTIV'],y=df1['ASF'],hue=df1['Group'],size=df1['CDR'],data=df1,sizes=(50,500))
plt.setp(ax.get_xticklabels(), rotation=90)
ax.legend(loc='upper left',bbox_to_anchor=(1,1))

# 3. Select the model by PyCaret

In [None]:
pip install pycaret

In [None]:
from pycaret.classification import *

In [None]:
exp1 = setup(df1, target = 'Group', ignore_features = None,silent=True)

In [None]:
compare_models()

# I chose Light Gradient Boosting Machine.

# 4. Create and tune the model

In [None]:
lgbm =  create_model('lightgbm')

In [None]:
tuned_lgbm=tune_model(lgbm)

In [None]:
plot_model(tuned_lgbm)

In [None]:
interpret_model(tuned_lgbm)

In [None]:
plot_model(tuned_lgbm, plot = 'confusion_matrix')

# 5. Finalize the model

In [None]:
finalize_lgbm = finalize_model(tuned_lgbm)
predictions = predict_model(finalize_lgbm, data=df1)
predictions.head(30)

# 'Label' shows the predictions.