In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install numpy scikit-learn
!pip install -q torch_geometric
!pip install -q class_resolver
!pip3 install pymatting
!pip install opencv-python
!pip install xgboost imbalanced-learn

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m85.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymatting
  Downloading pymatting-1.1.14-py3-none-any.whl.metadata (7.7 kB)
Downloading pymatting-1.1.14-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.7/54.7 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymatting
Successfully installed pymatting-1.1.14


In [6]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from sklearn.model_selection import StratifiedShuffleSplit
from xgboost import XGBClassifier
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_curve, roc_auc_score, log_loss
)

In [7]:
# === Load Patients ===
fa_patients_path = "/home/snu/Downloads/NIFD_Patients_FA_Histogram_Feature.npy"
Patients_FA_array = np.load(fa_patients_path, allow_pickle=True)

# === Load Controls ===
fa_controls_path = "/home/snu/Downloads/NIFD_Control_FA_Histogram_Feature.npy"
Controls_FA_array = np.load(fa_controls_path, allow_pickle=True)

print("Patients Shape:", Patients_FA_array.shape)
print("Controls Shape:", Controls_FA_array.shape)

# === Combine features and labels ===
X = np.vstack([Controls_FA_array, Patients_FA_array])
y = np.hstack([
    np.zeros(Controls_FA_array.shape[0], dtype=np.int64),  # 0 = Control
    np.ones(Patients_FA_array.shape[0], dtype=np.int64)    # 1 = Patient
])

# Shuffle
np.random.seed(42)
perm = np.random.permutation(X.shape[0])
X = X[perm]
y = y[perm]

Patients Shape: (98, 180)
Controls Shape: (48, 180)


In [8]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [9]:
# Stratified split
sss = StratifiedShuffleSplit(n_splits=20, test_size=0.3, random_state=42)

In [10]:
accuracies, precisions, recalls, f1_scores, losses, all_auc = [], [], [], [], [], []
all_y_true, all_y_proba, all_fpr, all_tpr = [], [], [], []

for fold, (train_idx, test_idx) in enumerate(sss.split(X, y)):
    print(f"\n===== Fold {fold + 1} =====")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Train XGBoost
    xgb = XGBClassifier(
        n_estimators=200,
        max_depth=6,
        learning_rate=0.1,
        eval_metric='logloss',
        random_state=42
    )
    xgb.fit(X_train, y_train)

    # Predictions
    y_pred = xgb.predict(X_test)
    y_pred_proba = xgb.predict_proba(X_test)[:, 1]

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, y_pred_proba)
    loss = log_loss(y_test, y_pred_proba)

    # Save fold metrics
    accuracies.append(acc)
    precisions.append(prec)
    recalls.append(rec)
    f1_scores.append(f1)
    losses.append(loss)
    all_auc.append(auc)

    # Save ROC curve data (optional)
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    all_fpr.append(fpr)
    all_tpr.append(tpr)
    all_y_true.extend(y_test)
    all_y_proba.extend(y_pred_proba)

    print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | "
          f"F1: {f1:.4f} | AUC: {auc:.4f} | Loss: {loss:.4f}")

# ==============================
# Final averaged results
# ==============================
print("\n===== Average Results Across 20 Folds =====")
print(f"Accuracy:  {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall:    {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1 Score:  {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"AUC:       {np.mean(all_auc):.4f} ± {np.std(all_auc):.4f}")
print(f"Log Loss:  {np.mean(losses):.4f} ± {np.std(losses):.4f}")


===== Fold 1 =====
Accuracy: 0.6591 | Precision: 0.7273 | Recall: 0.8000 | F1: 0.7619 | AUC: 0.7548 | Loss: 0.6977

===== Fold 2 =====
Accuracy: 0.8636 | Precision: 0.8333 | Recall: 1.0000 | F1: 0.9091 | AUC: 0.8857 | Loss: 0.4565

===== Fold 3 =====
Accuracy: 0.6591 | Precision: 0.7419 | Recall: 0.7667 | F1: 0.7541 | AUC: 0.6524 | Loss: 0.8712

===== Fold 4 =====
Accuracy: 0.7500 | Precision: 0.7714 | Recall: 0.9000 | F1: 0.8308 | AUC: 0.7095 | Loss: 0.8566

===== Fold 5 =====
Accuracy: 0.7045 | Precision: 0.7429 | Recall: 0.8667 | F1: 0.8000 | AUC: 0.7619 | Loss: 0.5915

===== Fold 6 =====
Accuracy: 0.8182 | Precision: 0.8438 | Recall: 0.9000 | F1: 0.8710 | AUC: 0.8476 | Loss: 0.4989

===== Fold 7 =====
Accuracy: 0.7727 | Precision: 0.8125 | Recall: 0.8667 | F1: 0.8387 | AUC: 0.7976 | Loss: 0.5823

===== Fold 8 =====
Accuracy: 0.7273 | Precision: 0.8000 | Recall: 0.8000 | F1: 0.8000 | AUC: 0.8071 | Loss: 0.5113

===== Fold 9 =====
Accuracy: 0.8409 | Precision: 0.8108 | Recall: 1.000

In [None]:
accuracies, precisions, recalls, f1_scores, losses, all_auc = [], [], [], [], [], []
all_y_true, all_y_proba, all_fpr, all_tpr = [], [], [], []

for fold, (train_idx, test_idx) in enumerate(sss.split(X, y)):
    print(f"\n===== Fold {fold + 1} =====")

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Train XGBoost
    xgb = XGBClassifier(
        n_estimators=200,
        max_depth=6,
        learning_rate=0.1,
        eval_metric='logloss',
        random_state=42
    )
    xgb.fit(X_train, y_train)

    # Predictions
    y_pred = xgb.predict(X_test)
    y_pred_proba = xgb.predict_proba(X_test)[:, 1]

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, y_pred_proba)
    loss = log_loss(y_test, y_pred_proba)

    # Save fold metrics
    accuracies.append(acc)
    precisions.append(prec)
    recalls.append(rec)
    f1_scores.append(f1)
    losses.append(loss)
    all_auc.append(auc)

    # Save ROC curve data (optional)
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    all_fpr.append(fpr)
    all_tpr.append(tpr)
    all_y_true.extend(y_test)
    all_y_proba.extend(y_pred_proba)

    print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | "
          f"F1: {f1:.4f} | AUC: {auc:.4f} | Loss: {loss:.4f}")

print("\n===== Average Results Across 20 Folds =====")
print(f"Accuracy:  {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall:    {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1 Score:  {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"AUC:       {np.mean(all_auc):.4f} ± {np.std(all_auc):.4f}")
print(f"Log Loss:  {np.mean(losses):.4f} ± {np.std(losses):.4f}")


===== Fold 1 =====
Accuracy: 0.7671 | Precision: 0.7759 | Recall: 0.9184 | F1: 0.8411 | AUC: 0.7670 | Loss: 0.6467

===== Fold 2 =====
Accuracy: 0.7123 | Precision: 0.7500 | Recall: 0.8571 | F1: 0.8000 | AUC: 0.7202 | Loss: 0.7242

===== Fold 3 =====
Accuracy: 0.7123 | Precision: 0.7500 | Recall: 0.8571 | F1: 0.8000 | AUC: 0.7679 | Loss: 0.6747

===== Fold 4 =====
Accuracy: 0.6575 | Precision: 0.7069 | Recall: 0.8367 | F1: 0.7664 | AUC: 0.5842 | Loss: 1.0456

===== Fold 5 =====
Accuracy: 0.6301 | Precision: 0.6964 | Recall: 0.7959 | F1: 0.7429 | AUC: 0.6675 | Loss: 0.7283

===== Fold 6 =====
Accuracy: 0.6849 | Precision: 0.7321 | Recall: 0.8367 | F1: 0.7810 | AUC: 0.7440 | Loss: 0.7445

===== Fold 7 =====
Accuracy: 0.7671 | Precision: 0.8077 | Recall: 0.8571 | F1: 0.8317 | AUC: 0.7466 | Loss: 0.6462

===== Fold 8 =====
Accuracy: 0.6986 | Precision: 0.7755 | Recall: 0.7755 | F1: 0.7755 | AUC: 0.7645 | Loss: 0.6044

===== Fold 9 =====
Accuracy: 0.7260 | Precision: 0.7636 | Recall: 0.857