In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install numpy scikit-learn
!pip install -q torch_geometric
!pip install -q class_resolver
!pip3 install pymatting
!pip install opencv-python
!pip install xgboost imbalanced-learn

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymatting
  Downloading pymatting-1.1.14-py3-none-any.whl.metadata (7.7 kB)
Downloading pymatting-1.1.14-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.7/54.7 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymatting
Successfully installed pymatting-1.1.14


In [3]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from sklearn.model_selection import StratifiedShuffleSplit
from xgboost import XGBClassifier
from sklearn.metrics import roc_curve, auc, roc_auc_score

# Load CN and AD feature data
cn_fa_feature_path = "/content/drive/MyDrive/TejaswiAbburi_va797/Dataset/ISBI_ADNI_CN_dataset/Processed_histogram_features_CN_All/Histogram_CN_FA_20bin_updated.npy"
ad_fa_feature_path = "/content/drive/MyDrive/TejaswiAbburi_va797/Dataset/ISBI_ADNI_AD_dataset/Processed_histogram_features_AD_All/Histogram_AD_FA_20bin_updated.npy"

Histogram_feature_CN_FA = np.load(cn_fa_feature_path, allow_pickle=True)
Histogram_feature_AD_FA = np.load(ad_fa_feature_path, allow_pickle=True)
print(Histogram_feature_CN_FA.shape)
print(Histogram_feature_AD_FA.shape)

X_cn = Histogram_feature_CN_FA
X_ad = Histogram_feature_AD_FA

y_cn = np.zeros(X_cn.shape[0])   # CN = 0
y_ad = np.ones(X_ad.shape[0])  # MCI = 1

# Combine data
X = np.vstack((X_cn, X_ad))
y = np.hstack((y_cn, y_ad))
np.random.seed(42)
perm = np.random.permutation(X.shape[0])
X = X[perm]
y = y[perm]

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Stratified 20-fold shuffle split (10% train, 90% test)
sss = StratifiedShuffleSplit(n_splits=20, test_size=0.9, random_state=42)

# Metric storage
accuracies, precisions, recalls, f1_scores, losses = [], [], [], [], []

# Initialize lists for plotting
all_y_true = []
all_y_proba = []
all_fpr = []
all_tpr = []
all_auc = []


# Loop over folds
for fold, (train_idx, test_idx) in enumerate(sss.split(X, y)):
    print(f"\nTraining fold {fold + 1}")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # XGBoost Classifier (no warning: no use_label_encoder param)
    xgb = XGBClassifier(n_estimators=200, max_depth=6, learning_rate=0.1, eval_metric='logloss', random_state=42)
    xgb.fit(X_train, y_train)

    y_pred = xgb.predict(X_test)
    y_pred_proba = xgb.predict_proba(X_test)[:, 1]

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    loss = log_loss(y_test, y_pred_proba)
    auc_score = roc_auc_score(y_test, y_pred_proba)

    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    losses.append(loss)
    all_y_true.extend(y_test)
    all_y_proba.extend(y_pred_proba)

    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    all_fpr.append(fpr)
    all_tpr.append(tpr)
    all_auc.append(auc_score)

    print(f"Accuracy: {accuracy:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f} | Loss: {loss:.4f} | AUC: {auc_score:.4f}")

# Final average results
print("\nAverage Results across 20 folds:")
print(f"Accuracy: {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall: {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1 Score: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"Cross-Entropy Loss: {np.mean(losses):.4f} ± {np.std(losses):.4f}")
print(f"Average AUC: {np.mean(all_auc):.4f} ± {np.std(all_auc):.4f}")

(133, 180)
(90, 180)

Training fold 1
Accuracy: 0.6915 | Precision: 0.7209 | Recall: 0.3827 | F1: 0.5000 | Loss: 0.5816 | AUC: 0.7816

Training fold 2
Accuracy: 0.6965 | Precision: 0.6389 | Recall: 0.5679 | F1: 0.6013 | Loss: 0.6101 | AUC: 0.7283

Training fold 3
Accuracy: 0.7761 | Precision: 0.7903 | Recall: 0.6049 | F1: 0.6853 | Loss: 0.5903 | AUC: 0.7657

Training fold 4
Accuracy: 0.7612 | Precision: 0.7538 | Recall: 0.6049 | F1: 0.6712 | Loss: 0.5794 | AUC: 0.7806

Training fold 5
Accuracy: 0.6965 | Precision: 0.6429 | Recall: 0.5556 | F1: 0.5960 | Loss: 0.6243 | AUC: 0.7287

Training fold 6
Accuracy: 0.7363 | Precision: 0.7500 | Recall: 0.5185 | F1: 0.6131 | Loss: 0.5646 | AUC: 0.7744

Training fold 7
Accuracy: 0.6816 | Precision: 0.6545 | Recall: 0.4444 | F1: 0.5294 | Loss: 0.6265 | AUC: 0.7467

Training fold 8
Accuracy: 0.6866 | Precision: 0.5918 | Recall: 0.7160 | F1: 0.6480 | Loss: 0.6000 | AUC: 0.7636

Training fold 9
Accuracy: 0.7761 | Precision: 0.8103 | Recall: 0.5802 | F1