In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.datasets import load_digits
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import label_binarize
from keras.preprocessing.image import ImageDataGenerator
import os
from tensorflow import keras
from tensorflow.keras import layers

## **Dataset**

In [None]:
## Set Path Here before running the code
WORKING_DIRECTORY =  "/kaggle/input/alzheimer-mri-dataset/Dataset"

##  Name of classes
CLASSES = ['Mild_Demented',
           'Moderate_Demented',
           'Non_Demented',
           'Very_Mild_Demented']

class_counts = {cls: 0 for cls in CLASSES}

# Loop through each class subdirectory and count the number of samples
for cls in CLASSES:
    class_path = os.path.join(WORKING_DIRECTORY, cls)
    class_counts[cls] = len(os.listdir(class_path))

# Plot the data distribution
plt.figure(figsize=(8, 4))
plt.bar(class_counts.keys(), class_counts.values(), color=['purple', 'orange', 'green', 'blue'])
plt.title('Data Distribution of Alzheimer\'s MRI Classes')
plt.xlabel('Classes')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
X, y = [], []

## Images rescaling
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

#   Load images by resizing and shuffling randomly
train_dataset = datagen.flow_from_directory(WORKING_DIRECTORY, target_size=(128, 128),batch_size=6400, shuffle=True, class_mode='categorical')

### Seperate Dataset from  Data Genrator
X, y = train_dataset.next()

## **Split Data**

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print("X_train Shape:", X_train.shape)
print("y_train Shape:", y_train.shape)
print("X_test Shape:", X_test.shape)
print("y_test Shape:", y_test.shape)

## **Train and Evaluate**
## **Classifiers :** 
* **Decision Tree**
* **Random Forest**
* **XGBoost**
* **Voting Classifier {DecisionTree, RandomForest, XGBoost}**

In [None]:
# Create a Decision Tree classifier
classifier = DecisionTreeClassifier(random_state=42)

In [None]:
X_train_flattened = X_train.reshape(X_train.shape[0], -1)

In [None]:
classifier.fit(X_train_flattened, y_train)

In [None]:
X_test_flattened = X_test.reshape(X_test.shape[0], -1)

In [None]:
# Make predictions
y_pred = classifier.predict(X_test_flattened)

In [None]:
# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')


In [None]:
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

In [None]:
# Generate a multilabel confusion matrix
from sklearn.metrics import multilabel_confusion_matrix
conf_matrix = multilabel_confusion_matrix(y_test, y_pred)


In [None]:
# Plot the confusion matrix using seaborn
import seaborn as sns
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))

for i in range(len(conf_matrix)):
    sns.heatmap(conf_matrix[i], annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=np.unique(y), yticklabels=np.unique(y), ax=axes[i // 2, i % 2])
    axes[i // 2, i % 2].set_title(f'Class {i}')

plt.suptitle('Multilabel Confusion Matrix')
plt.show()

In [None]:
labels = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
values = [accuracy, precision, recall, f1]

plt.bar(labels, values, color=['blue', 'green', 'orange', 'red'])
plt.title('Performance Metrics')
plt.ylabel('Score')
plt.show()

In [None]:
rf = RandomForestClassifier(random_state=42)

In [None]:
xgb = XGBClassifier(random_state = 42)

In [None]:
# Create BaggingClassifier
bagging_classifier = BaggingClassifier([('dt', DecisionTreeClassifier()),
                                      ('rf', RandomForestClassifier()),
                                      ('xgb', XGBClassifier()),
                                     ])

In [None]:
gradient_boosting = GradientBoostingClassifier(random_state=42)

In [None]:
rf.fit(X_train_flattened, y_train)

In [None]:
xgb.fit(X_train_flattened, y_train)

In [None]:
voting_classifier.fit(X_train_flattened, y_train)

In [None]:
gradient_boosting.fit(X_train_flattened, y_train)

In [None]:
rf_pred = rf.predict(X_test_flattened)
xgb_pred = rf.predict(X_test_flattened)
voting_pred = voting_classifier.predict(X_test_flattened)
boosting_pred = gradient_boosting.predict(X_test_flattened)

In [None]:
classifiers = {
    'Random Forest': rf,
    'XGBoost': xgb,
    'Gradient Boosting': gradient_boosting,
    'Decision Tree': classifier  
}

In [None]:
accuracy_scores = {}
precision_scores = {}
recall_scores = {}
f1_scores = {}

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

conf_matrices = {}

for clf_name, clf in classifiers.items():
    # Predict on the test set
    y_pred = clf.predict(X_test_flattened)
    
    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')

    # Compute confusion matrix
    conf_mat = confusion_matrix(y_true, y_pred)
    conf_matrices[clf_name] = conf_mat
    
    print('\nMetrics Summary:')
    print('{:<20} {:<10} {:<10} {:<10} {:<10}'
          .format('Classifier', 'Accuracy', 'Precision', 'Recall', 'F1 Score'))
    
    for clf_name in classifiers:
        print('{:<20} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}'
              .format(clf_name,
                      accuracy_scores[clf_name], 
                      precision_scores[clf_name], 
                      recall_scores[clf_name], 
                      f1_scores[clf_name]))

    # Print confusion matrix
    print(f'{clf_name} Confusion Matrix:')
    print(conf_mat)
    print('\n')

    # Plot confusion matrix
    plt.figure(figsize=(6, 6))
    sns.heatmap(conf_mat, 
                annot=True, 
                fmt='d', 
                cmap='Blues', 
                cbar=False,
                xticklabels=label_encoder.classes_, 
                yticklabels=label_encoder.classes_)
    
    plt.title(f'Confusion Matrix - {clf_name}')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()