In [None]:
import os 
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.image import ImageDataGenerator
import keras 
from keras.callbacks import EarlyStopping,ModelCheckpoint
import tensorflow as tf
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from tqdm import tqdm
from imblearn.over_sampling import SMOTE

In [None]:
images = []
labels = []
dataset_path = 'dataset'
for subfolder in tqdm(os.listdir(dataset_path)):
    subfolder_path = os.path.join(dataset_path, subfolder)
    for folder in os.listdir(subfolder_path):
        subfolder_path2=os.path.join(subfolder_path,folder)
        for image_filename in os.listdir(subfolder_path2):
            image_path = os.path.join(subfolder_path2, image_filename)
            images.append(image_path)
            labels.append(folder)
df = pd.DataFrame({'image': images, 'label': labels})
df

In [None]:
plt.figure(figsize=(4, 4))
class_counts = df['label'].value_counts()
labels = class_counts.index
sizes = class_counts.values
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=sns.color_palette('Set1'))
plt.title('Distribution of Classes', fontsize=20)
plt.show()

In [None]:
for label, size in zip(labels, sizes):
    print(f"Label: {label}, Size: {size}")

In [None]:
class_num=np.sort(['MildDemented','ModerateDemented','NonDemented','VeryMildDemented'])
class_num

In [None]:
# Sample image for each class
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    img = cv2.imread(image_path)
    img = cv2.resize(img, target_size)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

plt.figure(figsize=(12, 8))
for i, class_name in enumerate(class_num):
    class_images = df[df['label'] == class_name]['image'].sample(3, replace=True).tolist()
    for j, img_path in enumerate(class_images):
        plt.subplot(4, 3, i*3 + j + 1) 
        img = load_and_preprocess_image(img_path)
        plt.imshow(img)
        plt.title(f'{class_name} - Sample {j+1}')
        plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
sizes = []
for img in df['image']:
    if os.path.exists(img): # Check if the file exists
        img_shape = cv2.imread(img).shape
        if img_shape is not None: # Check if the image was read successfully
            sizes.append(img_shape)
        else:
            print(f"Failed to read image: {img}")
    else:
        print(f"File does not exist: {img}")

sizes_df = pd.DataFrame(sizes, columns=['Height', 'Width', 'Channels'])

# Convert infinite values to NaN in sizes_df as well, just in case
sizes_df.replace([np.inf, -np.inf], np.nan, inplace=True)

plt.figure(figsize=(6, 4))
sns.histplot(sizes_df['Height'], kde=True, color='blue', label='Height', alpha=0.7)
sns.histplot(sizes_df['Width'], kde=True, color='orange', label='Width', alpha=0.7)
plt.title('Image Size Distribution')
plt.xlabel('Size')
plt.legend()
plt.show()

In [None]:
Size=(176,176)
work_dr = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    shear_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)
# enhancing model generalization
train_data_gen = work_dr.flow_from_dataframe(df,x_col='image',y_col='label', target_size=Size, batch_size=6500, shuffle=False)

In [None]:
train_data, train_labels = train_data_gen.next()

In [None]:
sm = SMOTE(random_state=42)
train_data, train_labels = sm.fit_resample(train_data.reshape(-1, 176 * 176 * 3), train_labels)
train_data = train_data.reshape(-1, 176,176, 3)
print(train_data.shape, train_labels.shape)

In [None]:
original_labels = np.argmax(train_labels, axis=1)
df_resampled = pd.DataFrame({'label': original_labels})
class_counts_resampled = df_resampled['label'].value_counts()

In [None]:
labels = ['MildDemented','ModerateDemented','NonDemented','VeryMildDemented']
sizes = class_counts_resampled.values

assert len(labels) == len(sizes), "Length mismatch between labels and sizes"

class_distribution_table = pd.DataFrame({'Class': labels, 'Count': sizes})

plt.figure(figsize=(6, 4))
table = plt.table(cellText=class_distribution_table.values,
                  colLabels=class_distribution_table.columns,
                  cellLoc='center', loc='center', bbox=[0, 0, 1, 1])

table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1.2, 1.2) 

for (i, j), cell in table.get_celld().items():
    if i == 0:
        cell.set_text_props(fontweight='bold')

plt.axis('off')
plt.title('Class Distribution after SMOTE', fontsize=16)
plt.show()

In [None]:
X_train, X_test1, y_train, y_test1 = train_test_split(train_data,train_labels, test_size=0.3, random_state=42,shuffle=True,stratify=train_labels)
X_val, X_test, y_val, y_test = train_test_split(X_test1,y_test1, test_size=0.5, random_state=42,shuffle=True,stratify=y_test1)
print('X_train shape is ' , X_train.shape)
print('X_test shape is ' , X_test.shape)
print('X_val shape is ' , X_val.shape)
print('y_train shape is ' , y_train.shape)
print('y_test shape is ' , y_test.shape)
print('y_val shape is ' , y_val.shape)

In [None]:
plt.figure(figsize=(12, 6))
for i in range(5):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_train[i])
    plt.title(f'Augmented\n{class_num[np.argmax(y_train[i])]}')
    plt.axis('off')
plt.tight_layout()
plt.show()

# **Models**

# InceptionV3

In [None]:
base_model = tf.keras.applications.InceptionV3(input_shape=(176,176,3),include_top=False,weights='imagenet')
base_model.trainable = False
model_Inception=keras.models.Sequential()
model_Inception.add(base_model)
model_Inception.add(keras.layers.Dropout(.5))
model_Inception.add(keras.layers.GlobalAveragePooling2D()) 
model_Inception.add(keras.layers.Flatten()) 
model_Inception.add(keras.layers.BatchNormalization())
model_Inception.add(keras.layers.Dense(512,activation=tf.nn.relu))
model_Inception.add(keras.layers.BatchNormalization())
model_Inception.add(keras.layers.Dropout(.5))
model_Inception.add(keras.layers.Dense(256,activation=tf.nn.relu))
model_Inception.add(keras.layers.BatchNormalization())
model_Inception.add(keras.layers.Dropout(.5))
model_Inception.add(keras.layers.Dense(128,activation=tf.nn.relu))
model_Inception.add(keras.layers.BatchNormalization())
model_Inception.add(keras.layers.Dropout(.5))
model_Inception.add(keras.layers.Dense(64,activation=tf.nn.relu))
model_Inception.add(keras.layers.BatchNormalization())
model_Inception.add(keras.layers.Dropout(.5))
model_Inception.add(keras.layers.BatchNormalization())
model_Inception.add(keras.layers.Dense(4, activation=tf.nn.softmax))
model_Inception.summary()

In [None]:
from keras.optimizers import RMSprop

checkpoint_cb = ModelCheckpoint("inception.h5", save_best_only=True)
early_stopping_cb = EarlyStopping(patience=4, restore_best_weights=True)

optimizer_rmsprop = RMSprop(learning_rate=0.001)  # You can adjust the learning rate if needed
model_Inception.compile(optimizer=optimizer_rmsprop, loss='categorical_crossentropy', metrics=['accuracy'])
hist = model_Inception.fit(X_train, y_train, epochs=20, validation_data=(X_val, y_val), callbacks=[checkpoint_cb, early_stopping_cb], batch_size=256)

In [None]:
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(hist.history['loss'], label='Train')
plt.plot(hist.history['val_loss'], label='Validation')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(hist.history['accuracy'], label='Train')
plt.plot(hist.history['val_accuracy'], label='Validation')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

y_pred = model_Inception.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

cm = confusion_matrix(y_true_classes, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

print("Classification Report:")
print(classification_report(y_true_classes, y_pred_classes))

In [None]:
score, acc= model_Inception.evaluate(X_test,y_test)
print('Test Loss =', score)
print('Test Accuracy =', acc)

# Resnet50 approach

In [None]:
from fastai.vision.all import *

In [None]:
data = DataBlock(blocks=(ImageBlock, CategoryBlock),
                 get_x=ColReader('image', pref=''), 
                 get_y=ColReader('label'),
                 splitter=RandomSplitter(),
                 batch_tfms=[*aug_transforms(), Normalize.from_stats(*imagenet_stats)])

In [None]:
dls = data.dataloaders(df, bs=64)

In [None]:
dls.show_batch(max_n=12, nrows=3)

In [None]:
arch = resnet50
learn = vision_learner(dls, arch, metrics=accuracy)

In [None]:
learn.lr_find()
learn.recorder.plot_lr_find()

In [None]:
learn.fine_tune(50, base_lr=0.1, freeze_epochs=5)


In [None]:
# export_path = '/kaggle/working/resnet50.pkl'
# learn.export(export_path)
# %cd /kaggle/working
# from IPython.display import FileLink
# FileLink(r'resnet50.pkl')

In [None]:
learn.recorder.plot_loss()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()
interp.print_classification_report()

In [None]:
accuracy = learn.recorder.final_record[1]
print(f'Final Accuracy: {accuracy * 100:.2f}%')