In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install numpy scikit-learn
!pip install -q torch_geometric
!pip install -q class_resolver
!pip3 install pymatting
!pip install opencv-python
!pip install xgboost imbalanced-learn

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m74.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymatting
  Downloading pymatting-1.1.14-py3-none-any.whl.metadata (7.7 kB)
Downloading pymatting-1.1.14-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.7/54.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymatting
Successfully installed pymatting-1.1.14


In [2]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from sklearn.model_selection import StratifiedShuffleSplit
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_curve, roc_auc_score, log_loss
)

In [3]:
# Load CN and MCI feature data
cn_fa_feature_path = "/content/drive/MyDrive/TejaswiAbburi_va797/Dataset/ISBI_ADNI_CN_dataset/Processed_histogram_features_CN_All/Histogram_CN_FA_20bin_updated.npy"
mci_fa_feature_path = "/content/drive/MyDrive/TejaswiAbburi_va797/Dataset/ISBI_ADNI_MCI_dataset/Processed_histogram_features_MCI_All/Histogram_MCI_FA_20bin_updated.npy"

Histogram_feature_CN_FA = np.load(cn_fa_feature_path, allow_pickle=True)
Histogram_feature_MCI_FA = np.load(mci_fa_feature_path, allow_pickle=True)

In [4]:
X_cn = Histogram_feature_CN_FA
X_mci = Histogram_feature_MCI_FA

y_cn = np.zeros(X_cn.shape[0])   # CN = 0
y_mci = np.ones(X_mci.shape[0])  # MCI = 1

# Combine data
X = np.vstack((X_cn, X_mci))
y = np.hstack((y_cn, y_mci))

In [5]:
np.random.seed(42)
perm = np.random.permutation(X.shape[0])
X = X[perm]
y = y[perm]

In [6]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [7]:
# Stratified split
sss = StratifiedShuffleSplit(n_splits=20, test_size=0.3, random_state=42)

In [8]:
accuracies, precisions, recalls, f1_scores, losses, all_auc = [], [], [], [], [], []
all_y_true, all_y_proba, all_fpr, all_tpr = [], [], [], []

for fold, (train_idx, test_idx) in enumerate(sss.split(X, y)):
    print(f"\n===== Fold {fold + 1} =====")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Train XGBoost
    xgb = XGBClassifier(
        n_estimators=200,
        max_depth=6,
        learning_rate=0.1,
        eval_metric='logloss',
        random_state=42
    )
    xgb.fit(X_train, y_train)

    # Predictions
    y_pred = xgb.predict(X_test)
    y_pred_proba = xgb.predict_proba(X_test)[:, 1]

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, y_pred_proba)
    loss = log_loss(y_test, y_pred_proba)

    # Save fold metrics
    accuracies.append(acc)
    precisions.append(prec)
    recalls.append(rec)
    f1_scores.append(f1)
    losses.append(loss)
    all_auc.append(auc)

    # Save ROC curve data (optional)
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    all_fpr.append(fpr)
    all_tpr.append(tpr)
    all_y_true.extend(y_test)
    all_y_proba.extend(y_pred_proba)

    print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | "
          f"F1: {f1:.4f} | AUC: {auc:.4f} | Loss: {loss:.4f}")

# ==============================
# Final averaged results
# ==============================
print("\n===== Average Results Across 20 Folds =====")
print(f"Accuracy:  {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall:    {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1 Score:  {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"AUC:       {np.mean(all_auc):.4f} ± {np.std(all_auc):.4f}")
print(f"Log Loss:  {np.mean(losses):.4f} ± {np.std(losses):.4f}")


===== Fold 1 =====
Accuracy: 0.8556 | Precision: 0.8491 | Recall: 0.9000 | F1: 0.8738 | AUC: 0.8940 | Loss: 0.4580

===== Fold 2 =====
Accuracy: 0.8111 | Precision: 0.8235 | Recall: 0.8400 | F1: 0.8317 | AUC: 0.8925 | Loss: 0.4405

===== Fold 3 =====
Accuracy: 0.7667 | Precision: 0.7959 | Recall: 0.7800 | F1: 0.7879 | AUC: 0.8665 | Loss: 0.5657

===== Fold 4 =====
Accuracy: 0.8333 | Precision: 0.8431 | Recall: 0.8600 | F1: 0.8515 | AUC: 0.8930 | Loss: 0.4603

===== Fold 5 =====
Accuracy: 0.7778 | Precision: 0.8125 | Recall: 0.7800 | F1: 0.7959 | AUC: 0.8845 | Loss: 0.5259

===== Fold 6 =====
Accuracy: 0.7556 | Precision: 0.7917 | Recall: 0.7600 | F1: 0.7755 | AUC: 0.8525 | Loss: 0.6006

===== Fold 7 =====
Accuracy: 0.8000 | Precision: 0.8478 | Recall: 0.7800 | F1: 0.8125 | AUC: 0.8910 | Loss: 0.4642

===== Fold 8 =====
Accuracy: 0.8111 | Precision: 0.8837 | Recall: 0.7600 | F1: 0.8172 | AUC: 0.8880 | Loss: 0.5361

===== Fold 9 =====
Accuracy: 0.8222 | Precision: 0.8864 | Recall: 0.780

In [None]:
accuracies, precisions, recalls, f1_scores, losses, all_auc = [], [], [], [], [], []
all_y_true, all_y_proba, all_fpr, all_tpr = [], [], [], []

for fold, (train_idx, test_idx) in enumerate(sss.split(X, y)):
    print(f"\n===== Fold {fold + 1} =====")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Train XGBoost
    xgb = XGBClassifier(
        n_estimators=200,
        max_depth=6,
        learning_rate=0.1,
        eval_metric='logloss',
        random_state=42
    )
    xgb.fit(X_train, y_train)

    # Predictions
    y_pred = xgb.predict(X_test)
    y_pred_proba = xgb.predict_proba(X_test)[:, 1]

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, y_pred_proba)
    loss = log_loss(y_test, y_pred_proba)

    # Save fold metrics
    accuracies.append(acc)
    precisions.append(prec)
    recalls.append(rec)
    f1_scores.append(f1)
    losses.append(loss)
    all_auc.append(auc)

    # Save ROC curve data (optional)
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    all_fpr.append(fpr)
    all_tpr.append(tpr)
    all_y_true.extend(y_test)
    all_y_proba.extend(y_pred_proba)

    print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | "
          f"F1: {f1:.4f} | AUC: {auc:.4f} | Loss: {loss:.4f}")

# ==============================
# Final averaged results
# ==============================
print("\n===== Average Results Across 20 Folds =====")
print(f"Accuracy:  {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall:    {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1 Score:  {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"AUC:       {np.mean(all_auc):.4f} ± {np.std(all_auc):.4f}")
print(f"Log Loss:  {np.mean(losses):.4f} ± {np.std(losses):.4f}")


===== Fold 1 =====
Accuracy: 0.9000 | Precision: 0.9375 | Recall: 0.8824 | F1: 0.9091 | AUC: 0.9729 | Loss: 0.2173

===== Fold 2 =====
Accuracy: 0.7333 | Precision: 0.7647 | Recall: 0.7647 | F1: 0.7647 | AUC: 0.8869 | Loss: 0.5615

===== Fold 3 =====
Accuracy: 0.8000 | Precision: 0.8235 | Recall: 0.8235 | F1: 0.8235 | AUC: 0.8597 | Loss: 0.5619

===== Fold 4 =====
Accuracy: 0.9000 | Precision: 0.8889 | Recall: 0.9412 | F1: 0.9143 | AUC: 0.9548 | Loss: 0.2748

===== Fold 5 =====
Accuracy: 0.7333 | Precision: 0.7647 | Recall: 0.7647 | F1: 0.7647 | AUC: 0.8326 | Loss: 0.5906

===== Fold 6 =====
Accuracy: 0.8333 | Precision: 0.9286 | Recall: 0.7647 | F1: 0.8387 | AUC: 0.9231 | Loss: 0.3818

===== Fold 7 =====
Accuracy: 0.8333 | Precision: 1.0000 | Recall: 0.7059 | F1: 0.8276 | AUC: 0.9502 | Loss: 0.4027

===== Fold 8 =====
Accuracy: 0.8667 | Precision: 1.0000 | Recall: 0.7647 | F1: 0.8667 | AUC: 0.9910 | Loss: 0.3109

===== Fold 9 =====
Accuracy: 0.8667 | Precision: 0.9333 | Recall: 0.823