In [None]:
import os, gc
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG16, DenseNet121, MobileNet
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import time
import seaborn as sns

In [None]:
# --- Load pre-trained model ---
def get_feature_extractor(model_name, input_shape=(224, 224, 3)):
    if model_name == "VGG16":
        base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    elif model_name == "DenseNet121":
        base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)
    elif model_name == "MobileNetV1":
        base_model = MobileNet(weights='imagenet', include_top=False, input_shape=input_shape)
    else:
        raise ValueError("Invalid model name")
    return base_model

# --- Load ảnh ---
def load_data(directory, batch_size=32, target_size=(224, 224)):
    datagen = ImageDataGenerator(rescale=1./255)
    generator = datagen.flow_from_directory(
        directory,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )
    return generator
    
# --- Load dữ liệu ---
data_dir_train = "/kaggle/input/augmented-alzheimer-mri-dataset/AugmentedAlzheimerDataset"
data_dir_test  = "/kaggle/input/augmented-alzheimer-mri-dataset/OriginalDataset"
train_generator = load_data(data_dir_train)  
test_generator = load_data(data_dir_test)   
num_classes = len(train_generator.class_indices)

# --- Trích xuất đặc trưng --
def extract_features_and_labels(model, generator):
    # Tạo mô hình trích xuất đặc trưng
    feature_extractor = Model(inputs=model.input, outputs=GlobalAveragePooling2D()(model.output))
    # Trích xuất đặc trưng từ ảnh
    features = feature_extractor.predict(generator, verbose=1)
    labels = generator.classes[generator.index_array]
    return features, labels

In [None]:
train_data = ImageDataGenerator(rescale=1./255)

train_datagen = train_data.flow_from_directory(
    directory = data_dir_train,  
    target_size=(224, 224),
    class_mode='categorical'
)

# Lấy danh sách lớp
class_names = list(train_datagen.class_indices.keys())  
class_counts = np.bincount(train_datagen.classes)

# Vẽ biểu đồ cột
plt.figure(figsize=(8, 6))
sns.barplot(x=class_names, y=class_counts, palette='Blues')
# Thêm nhãn và tiêu đề
plt.xlabel("Lớp MRI", fontsize=12)
plt.ylabel("Số lượng mẫu", fontsize=12)
plt.title("Số lượng ảnh MRI tăng cường trong từng lớp", fontsize=14)

for i, count in enumerate(class_counts):
    plt.text(i, count + 5, str(count), ha='center', fontsize=12)
plt.show()

In [None]:
test_data = ImageDataGenerator(rescale=1./255)

test_datagen = test_data.flow_from_directory(
    directory = data_dir_test,  # Thay bằng đường dẫn thư mục test
    target_size=(224, 224),
    class_mode='categorical'
)

# Lấy danh sách lớp
class_names_t = list(test_datagen.class_indices.keys())  
class_counts_t = np.bincount(test_datagen.classes)

# Vẽ biểu đồ cột
plt.figure(figsize=(8, 6))
sns.barplot(x=class_names_t, y=class_counts_t, palette='Blues')
# Thêm nhãn và tiêu đề
plt.xlabel("Lớp MRI", fontsize=12)
plt.ylabel("Số lượng mẫu", fontsize=12)
plt.title("Số lượng ảnh MRI gốc trong từng lớp", fontsize=14)

for i, count in enumerate(class_counts_t):
    plt.text(i, count + 5, str(count), ha='center', fontsize=12)
plt.show()

In [None]:
# === Build Neural Network (NN) ===
def build_nn_classifier(input_dim, num_classes):
    model = Sequential([
        Dense(1024, activation='relu', input_shape=(input_dim,)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


def run_nn_classifier(X_train, y_train, X_test, y_test, input_dim, num_classes):
    clf = build_nn_classifier(input_dim, num_classes)
    clf.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.1)
    loss, acc = clf.evaluate(X_test, y_test)
    y_pred = np.argmax(clf.predict(X_test), axis=1)
    return acc * 100, y_pred, clf


# === LR ===
def run_lr_classifier(X_train, y_train, X_test, y_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    lr = LogisticRegression(max_iter=3000, solver='sag', multi_class='multinomial', C=5, n_jobs=-1,
        random_state=42)
    lr.fit(X_train_scaled, y_train)
    y_pred = lr.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    return acc * 100, y_pred, lr, scaler

# === RF ===
def run_rf_classifier(X_train, y_train, X_test, y_test):
    rf = RandomForestClassifier(n_estimators=100, max_depth=20, random_state=42, n_jobs=-1)
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return acc * 100, y_pred, rf

# === SVM ===
def run_svm_classifier(X_train, y_train, X_test, y_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    svm = SVC(kernel='rbf', C=0.3, gamma='scale')   #C=0.1
    svm.fit(X_train_scaled, y_train)
    y_pred = svm.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    return acc * 100, y_pred, svm, scaler


# --- Phân tích ---
def plot_confusion_matrix(y_true, y_pred, class_labels, title):
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
    
    fig, ax = plt.subplots(figsize=(8, 6))   #10,8
    disp.plot(cmap=plt.cm.Blues, ax=ax)

    for text in ax.texts:
        text.set_fontsize(18)   #16

    plt.xticks(rotation=45, ha='right', fontsize=14)  #10
    plt.yticks(rotation=0, fontsize=14)  
    
    plt.title(title, fontsize=16)  #14
    plt.tight_layout()  
    plt.show()


def print_classification_metrics(y_true, y_pred, classifier_name, model_name):
    print(f"\n Classification Report for {classifier_name} - {model_name}:")
    print(classification_report(y_true, y_pred))

#def measure_inference_time(model, X_test, name="Model"):
#    start_time = time.time()
#    _ = model.predict(X_test)
#    duration = time.time() - start_time
#    print(f"⏱ Inference time for {name}: {duration:.4f} seconds")

def plot_model_comparison(results):
    classifiers = list(next(iter(results.values())).keys())
    x = np.arange(len(results))
    width = 0.2
    plt.figure(figsize=(12, 6))
    for i, clf in enumerate(classifiers):
        accs = [results[model][clf] for model in results]
        bars = plt.bar(x + i * width, accs, width=width, label=clf)

        # Thêm nhãn giá trị accuracy trên đầu cột
        for bar in bars:
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2.0, height + 0.5, f'{height:.2f}%', 
                     ha='center', va='bottom', fontsize=10)
        #plt.bar(x + i * width, accs, width=width, label=clf)
    # Điều chỉnh lại vị trí legend để không bị đè lên biểu đồ
    plt.xticks(x + width * (len(classifiers)-1)/2, results.keys())
    plt.ylabel("Accuracy (%)")
    plt.title("So sánh hiệu suất các mô hình và phương pháp phân lớp")
    
    # Di chuyển legend ra ngoài biểu đồ
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.tight_layout()  # Đảm bảo layout đẹp khi có legend ngoài
    plt.show()


In [None]:
# --- So sánh mô hình ---
models_list = ["VGG16", "DenseNet121", "MobileNetV1"]
accuracy_results = {}

class_labels = list(train_generator.class_indices.keys())

for model_name in models_list:
    print(f"\n Processing pre-trained model: {model_name}")
    base_model = get_feature_extractor(model_name)

    print(" Extracting features...")
    X_train, y_train = extract_features_and_labels(base_model, train_generator)
    X_test, y_test  = extract_features_and_labels(base_model, test_generator)
    input_dim = X_train.shape[1]

    print(" Training MLP classifier...")
    acc_nn, y_pred_nn, nn_model = run_nn_classifier(X_train, y_train, X_test, y_test, input_dim, num_classes)
    print(f" MLP Accuracy: {acc_nn:.2f}%")

    print(" Training Logistic Regression classifier...")
    acc_lr, y_pred_lr, lr_model, lr_scaler = run_lr_classifier(X_train, y_train, X_test, y_test)
    print(f" Logistic Regression Accuracy: {acc_lr:.2f}%")
    
    print(" Training Random Forest classifier...")
    acc_rf, y_pred_rf, rf_model = run_rf_classifier(X_train, y_train, X_test, y_test)
    print(f" Random Forest Accuracy: {acc_rf:.2f}%")
    
    print(" Training SVM classifier on extracted features...")
    acc_svm, y_pred_svm, svm_model, svm_scaler = run_svm_classifier(X_train, y_train, X_test, y_test)
    print(f" SVM classifier Accuracy: {acc_svm:.2f}%")

    # === Evaluation ===
    print_classification_metrics(y_test, y_pred_nn, "MLP", model_name)
    print_classification_metrics(y_test, y_pred_lr, "Logistic Regression", model_name)
    print_classification_metrics(y_test, y_pred_rf, "Random Forest", model_name)
    print_classification_metrics(y_test, y_pred_svm, "SVM", model_name)

    plot_confusion_matrix(y_test, y_pred_nn, class_labels, f"MLP - {model_name}")
    plot_confusion_matrix(y_test, y_pred_lr, class_labels, f"LR - {model_name}")
    plot_confusion_matrix(y_test, y_pred_rf, class_labels, f"RF - {model_name}")
    plot_confusion_matrix(y_test, y_pred_svm, class_labels, f"SVM - {model_name}")

    #measure_inference_time(nn_model, X_test, name=f"NN - {model_name}")
    #measure_inference_time(lr_model, lr_scaler.transform(X_test), name=f"LR - {model_name}")
    #measure_inference_time(rf_model, X_test, name=f"RF - {model_name}")
    #measure_inference_time(svm_model, svm_scaler.transform(X_test), name=f"SVM - {model_name}")

    accuracy_results[model_name] = {
        "MLP": acc_nn,
        "Logistic Regression": acc_lr,
        "Random Forest": acc_rf,
        "SVM": acc_svm
    }

    # Lưu lại kết quả dự đoán để phân tích sau
    if 'all_preds' not in globals():
        all_preds = {}
    all_preds[model_name] = {
        "MLP": y_pred_nn,
        "LR": y_pred_lr,
        "RF": y_pred_rf,
        "SVM": y_pred_svm,
        "y_test": y_test
    }


    del base_model, X_train, X_test, y_train, y_test
    tf.keras.backend.clear_session()
    gc.collect()

In [None]:
print("\n Summary of Results:")
for model_name, results in accuracy_results.items():
    print(f"\n {model_name}:")
    for clf, acc in results.items():
        print(f"{clf}: {acc:.2f}%")
plot_model_comparison(accuracy_results)

In [None]:
import pandas as pd
results_df = pd.DataFrame(accuracy_results)
results_df = results_df.round(2)
# In bảng kết quả accuracy
print("\n Summary of Results:")
print(results_df.to_string(index=True))  # Chuyển DataFrame thành bảng và in ra

In [None]:
def show_misclassified_images(images, y_true, y_pred, title, class_labels, max_images=6):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    misclassified_idx = np.where(y_true != y_pred)[0]
    if len(misclassified_idx) == 0:
        print(f"No misclassified images for {title}")
        return

    print(f" Showing {min(len(misclassified_idx), max_images)} misclassified images for {title}")
    plt.figure(figsize=(10, 7))
    for i, idx in enumerate(misclassified_idx[:max_images]):
        image = images[idx]
        label_true = class_labels[int(y_true[idx])]
        label_pred = class_labels[int(y_pred[idx])]

        # Đảm bảo ảnh có đúng 3 kênh
        if image.ndim == 2:
            image = np.stack((image,) * 3, axis=-1)
        elif image.shape[-1] != 3:
            image = image[..., :3]

        image = np.clip(image, 0, 1)

        plt.subplot(2, 3, i + 1)
        plt.imshow(image)
        plt.title(f"True: {label_true}\nPred: {label_pred}", color='red')
        plt.axis('off')

    plt.suptitle(title, fontsize=14)
    plt.subplots_adjust(hspace=0.4)  # tăng khoảng cách dọc giữa các hàng
    plt.tight_layout(rect=[0, 0, 1, 0.95])  # chừa khoảng cho suptitle
    plt.show()

In [None]:
# Load lại đúng ảnh test để hiển thị
print("\n Loading test images for visualization...")
# Load lại ảnh test theo đúng thứ tự
test_image_paths = test_generator.filepaths
image_array = []
for path in test_image_paths:
    img = tf.keras.utils.load_img(path, color_mode='rgb', target_size=(224, 224))
    img = tf.keras.utils.img_to_array(img)
    img = img / 255.0
    image_array.append(img)
image_array = np.array(image_array, dtype=np.float32)

In [None]:
print("\n Displaying misclassified images for each model & classifier...")
for model_name in all_preds:
    y_test = all_preds[model_name]["y_test"]
    for clf_name in ["MLP", "LR", "RF", "SVM"]:
        y_pred = all_preds[model_name][clf_name]
        show_misclassified_images(image_array, y_test, y_pred, f"{clf_name} Misclassified - {model_name}", class_labels)



In [None]:
results_df = pd.DataFrame(accuracy_results).T
results_df.to_csv("alzheimer_model_comparison.csv")
