In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as nnFn
import torch.optim as optim
import numpy as np
import random
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, log_loss
)

In [9]:
fa_feature_path = "/home/snu/Downloads/Histogram_CN_FA_20bin_updated.npy"
Histogram_feature_CN_FA_array = np.load(fa_feature_path, allow_pickle=True)

fa_feature_path = "/home/snu/Downloads/Histogram_AD_FA_20bin_updated.npy" # Assuming this is the AD data based on the request.
Histogram_feature_AD_FA_array = np.load(fa_feature_path, allow_pickle=True)

X = np.vstack([Histogram_feature_CN_FA_array, Histogram_feature_AD_FA_array])
y = np.hstack([
    np.zeros(Histogram_feature_CN_FA_array.shape[0], dtype=np.int64),
    np.ones(Histogram_feature_AD_FA_array.shape[0], dtype=np.int64)
])

num_nodes, num_feats = X.shape
print(f"Features: {X.shape}, Labels: {y.shape}")

Features: (223, 180), Labels: (223,)


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as nnFn
from torch_geometric.nn import GATConv


class GAT_SemiSupervised(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, device, heads=2, activ="SELU", dropout=0.25):
        super(GAT_SemiSupervised, self).__init__()
        self.device = device

        # GAT layer
        self.conv1 = GATConv(input_dim, hidden_dim, heads=heads, concat=True, dropout=dropout)
        self.bn1 = nn.BatchNorm1d(hidden_dim * heads)  # heads multiply feature dimension if concat=True
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * heads, output_dim)
        self.num_clusters = output_dim

        # Activation mapping
        activations = {
            "SELU": nnFn.selu,
            "SiLU": nnFn.silu,
            "GELU": nnFn.gelu,
            "RELU": nnFn.relu,
            "ELU": nnFn.elu
        }
        self.act = activations.get(activ, nnFn.elu)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # GAT layer
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = self.act(x)
        x = self.dropout(x)

        logits = self.fc(x)
        return logits

    def cut_loss(self, A, S):
        S = nnFn.softmax(S, dim=1)
        A_pool = torch.matmul(torch.matmul(A, S).t(), S)
        num = torch.trace(A_pool)

        D = torch.diag(torch.sum(A, dim=-1))
        D_pooled = torch.matmul(torch.matmul(D, S).t(), S)
        den = torch.trace(D_pooled)
        mincut_loss = -(num / den)

        St_S = torch.matmul(S.t(), S)
        I_S = torch.eye(self.num_clusters, device=self.device)
        ortho_loss = torch.norm(St_S / torch.norm(St_S) - I_S / torch.norm(I_S))

        return mincut_loss + ortho_loss

In [11]:
def create_adj(F, alpha=1):
    F_norm = F / np.linalg.norm(F, axis=1, keepdims=True)
    W = np.dot(F_norm, F_norm.T)
    W = np.where(W >= alpha, 1, 0).astype(np.float32)
    W = W / W.max()
    return W

def load_data(adj, node_feats):
    node_feats = torch.from_numpy(node_feats).float()
    edge_index = torch.from_numpy(np.array(np.nonzero((adj > 0))))
    return Data(x=node_feats, edge_index=edge_index)

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
alpha = 0.8
feats_dim = num_feats
hidden_dim = 512
num_classes = 2
num_epochs = 2000
lr = 0.0001
weight_decay = 1e-4
batch_print_freq = 100
lambda_mod = 0.1 #0.01  # weight for modularity loss
# lambda_sup = 5

In [13]:
W = create_adj(X, alpha)
data = load_data(W, X).to(device)
A_tensor = torch.from_numpy(W).float().to(device)
print(data)

Data(x=[223, 180], edge_index=[2, 41689])


In [14]:
sss = StratifiedShuffleSplit(n_splits=20, test_size=0.9, random_state=42)

accuracies, precisions, recalls, f1_scores, aucs, ce_losses = [], [], [], [], [], []

for fold, (train_val_idx, test_idx_global) in enumerate(sss.split(X, y), start=1):
    print(f"\n=== Fold {fold} ===")


    cn_idx = np.where(y == 0)[0]
    ad_idx = np.where(y == 1)[0]

    sss_class = StratifiedShuffleSplit(n_splits=20, test_size=0.9, random_state=fold)
    cn_train_idx, _ = next(sss_class.split(X[cn_idx], y[cn_idx]))
    ad_train_idx, _ = next(sss_class.split(X[ad_idx], y[ad_idx]))

    cn_train = cn_idx[cn_train_idx]
    ad_train = ad_idx[ad_train_idx]
    train_idx_final = np.concatenate([cn_train, ad_train])
    np.random.shuffle(train_idx_final)

    print(f"Train CN: {len(cn_train)}, Train AD: {len(ad_train)}")

    train_idx_t = torch.from_numpy(train_idx_final).long().to(device)
    y_train_tensor = torch.from_numpy(y[train_idx_final]).long().to(device)


    model = GAT_SemiSupervised(feats_dim, hidden_dim, num_classes, device, activ="SELU").to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    ce_loss = nn.CrossEntropyLoss()


    for epoch in range(1, num_epochs + 1):
        model.train()
        optimizer.zero_grad()

        logits = model(data)
        loss_sup = ce_loss(logits[train_idx_t], y_train_tensor)
        loss_unsup = model.cut_loss(A_tensor, logits)
        total_loss = loss_sup + lambda_mod * loss_unsup

        total_loss.backward()
        optimizer.step()

        if epoch % batch_print_freq == 0 or epoch == 1:
            model.eval()
            with torch.no_grad():
                preds_train = logits[train_idx_t].argmax(dim=1)
                acc_train = accuracy_score(y_train_tensor.cpu(), preds_train.cpu())
            print(f"Fold {fold} Epoch {epoch}: "
                  f"TotalLoss={total_loss.item():.6f} | Sup={loss_sup.item():.6f} | "
                  f"Unsup={loss_unsup.item():.6f} | TrainAcc={acc_train:.4f}")


    model.eval()
    with torch.no_grad():
        out = model(data)
        preds = out.argmax(dim=1).cpu().numpy()
        probs = torch.softmax(out, dim=1)[:, 1].cpu().numpy()  # Probability for class 1

    y_test = y[test_idx_global]
    y_pred_test = preds[test_idx_global]
    y_prob_test = probs[test_idx_global]

    acc = accuracy_score(y_test, y_pred_test)
    prec = precision_score(y_test, y_pred_test, zero_division=0)
    rec = recall_score(y_test, y_pred_test, zero_division=0)
    f1 = f1_score(y_test, y_pred_test, zero_division=0)
    auc = roc_auc_score(y_test, y_prob_test)
    ce = log_loss(y_test, y_prob_test)

    accuracies.append(acc)
    precisions.append(prec)
    recalls.append(rec)
    f1_scores.append(f1)
    aucs.append(auc)
    ce_losses.append(ce)

    print(f"Fold {fold} → "
          f"Acc={acc:.4f} | Prec={prec:.4f} | Rec={rec:.4f} | "
          f"F1={f1:.4f} | AUC={auc:.4f} | CE Loss={ce:.4f}")


print("\n=== Average Results Across 20 Folds ===")
print(f"Accuracy:  {np.mean(accuracies):.4f} ± {np.std(accuracies):.4f}")
print(f"Precision: {np.mean(precisions):.4f} ± {np.std(precisions):.4f}")
print(f"Recall:    {np.mean(recalls):.4f} ± {np.std(recalls):.4f}")
print(f"F1-score:  {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print(f"AUC:       {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")
print(f"CE Loss:   {np.mean(ce_losses):.4f} ± {np.std(ce_losses):.4f}")


=== Fold 1 ===
Train CN: 13, Train AD: 9
Fold 1 Epoch 1: TotalLoss=0.603871 | Sup=0.631085 | Unsup=-0.272139 | TrainAcc=0.5909
Fold 1 Epoch 100: TotalLoss=0.397064 | Sup=0.430847 | Unsup=-0.337832 | TrainAcc=0.8182
Fold 1 Epoch 200: TotalLoss=0.180369 | Sup=0.215563 | Unsup=-0.351935 | TrainAcc=0.9545
Fold 1 Epoch 300: TotalLoss=0.051737 | Sup=0.087697 | Unsup=-0.359595 | TrainAcc=0.9545
Fold 1 Epoch 400: TotalLoss=-0.014620 | Sup=0.023333 | Unsup=-0.379529 | TrainAcc=1.0000
Fold 1 Epoch 500: TotalLoss=-0.035800 | Sup=0.005555 | Unsup=-0.413545 | TrainAcc=1.0000
Fold 1 Epoch 600: TotalLoss=-0.030989 | Sup=0.009825 | Unsup=-0.408138 | TrainAcc=1.0000
Fold 1 Epoch 700: TotalLoss=-0.038181 | Sup=0.002192 | Unsup=-0.403726 | TrainAcc=1.0000
Fold 1 Epoch 800: TotalLoss=-0.041689 | Sup=0.004661 | Unsup=-0.463496 | TrainAcc=1.0000
Fold 1 Epoch 900: TotalLoss=-0.044588 | Sup=0.003183 | Unsup=-0.477716 | TrainAcc=1.0000
Fold 1 Epoch 1000: TotalLoss=-0.049319 | Sup=0.000740 | Unsup=-0.500595 | 

In [15]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import StratifiedShuffleSplit
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss

# # ==========================================
# # CONFIG
# # ==========================================
# SEED = 42
# np.random.seed(SEED)
# torch.manual_seed(SEED)
# if torch.cuda.is_available():
#     torch.cuda.manual_seed_all(SEED)

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# hidden_dim = 512
# num_epochs = 2000
# lr = 1e-4
# weight_decay = 1e-4
# batch_print_freq = 500  # print every 500 epochs

# # Use same λ list as before
# lambda_mod_list = [0.001, 0.005, 0.009, 0.01, 0.05, 0.09, 0.1, 0.3, 0.5, 0.9, 1, 2, 5, 8]



# results_summary = []


# for lambda_mod in lambda_mod_list:
#     print(f"\n==============================")
#     print(f" Running with λ_mod = {lambda_mod}")
#     print(f"==============================")

#     accuracies, precisions, recalls, f1_scores, aucs, ce_losses = [], [], [], [], [], []

#     sss = StratifiedShuffleSplit(n_splits=20, test_size=0.5, random_state=SEED)

#     for fold, (train_val_idx, test_idx_global) in enumerate(sss.split(X, y), start=1):
#         print(f"\n=== Fold {fold} ===")

#         cn_idx = np.where(y == 0)[0]
#         ad_idx = np.where(y == 1)[0]

#         sss_class = StratifiedShuffleSplit(n_splits=20, test_size=0.5, random_state=fold)
#         cn_train_idx, _ = next(sss_class.split(X[cn_idx], y[cn_idx]))
#         ad_train_idx, _ = next(sss_class.split(X[ad_idx], y[ad_idx]))

#         cn_train = cn_idx[cn_train_idx]
#         ad_train = ad_idx[ad_train_idx]
#         train_idx_final = np.concatenate([cn_train, ad_train])
#         np.random.shuffle(train_idx_final)

#         print(f"Train CN: {len(cn_train)}, Train AD: {len(ad_train)}")

#         train_idx_t = torch.from_numpy(train_idx_final).long().to(device)
#         y_train_tensor = torch.from_numpy(y[train_idx_final]).long().to(device)

#         # Initialize model
#         model = GCN_SemiSupervised(feats_dim, hidden_dim, num_classes, device, activ="RELU").to(device)
#         optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
#         ce_loss = nn.CrossEntropyLoss()

#         # Training loop
#         for epoch in range(1, num_epochs + 1):
#             model.train()
#             optimizer.zero_grad()

#             logits = model(data)
#             loss_sup = ce_loss(logits[train_idx_t], y_train_tensor)
#             loss_unsup = model.modularity_loss(A_tensor, logits)
#             total_loss = loss_sup + lambda_mod * loss_unsup

#             total_loss.backward()
#             optimizer.step()

#             if epoch % batch_print_freq == 0 or epoch == 1:
#                 model.eval()
#                 with torch.no_grad():
#                     preds_train = logits[train_idx_t].argmax(dim=1)
#                     acc_train = accuracy_score(y_train_tensor.cpu(), preds_train.cpu())
#                 print(f"Fold {fold} Epoch {epoch}: "
#                       f"TotalLoss={total_loss.item():.6f} | Sup={loss_sup.item():.6f} | "
#                       f"Unsup={loss_unsup.item():.6f} | TrainAcc={acc_train:.4f}")

#         # Evaluation
#         model.eval()
#         with torch.no_grad():
#             out = model(data)
#             preds = out.argmax(dim=1).cpu().numpy()
#             probs = torch.softmax(out, dim=1)[:, 1].cpu().numpy()  # probability for "AD"

#         y_test = y[test_idx_global]
#         y_pred_test = preds[test_idx_global]
#         y_prob_test = probs[test_idx_global]

#         acc = accuracy_score(y_test, y_pred_test)
#         prec = precision_score(y_test, y_pred_test, zero_division=0)
#         rec = recall_score(y_test, y_pred_test, zero_division=0)
#         f1 = f1_score(y_test, y_pred_test, zero_division=0)
#         auc = roc_auc_score(y_test, y_prob_test)
#         ce = log_loss(y_test, y_prob_test)

#         accuracies.append(acc)
#         precisions.append(prec)
#         recalls.append(rec)
#         f1_scores.append(f1)
#         aucs.append(auc)
#         ce_losses.append(ce)

#         print(f"Fold {fold} → "
#               f"Acc={acc:.4f} | Prec={prec:.4f} | Rec={rec:.4f} | "
#               f"F1={f1:.4f} | AUC={auc:.4f} | CE Loss={ce:.4f}")

#     # Average results per λ_mod
#     mean_acc, std_acc = np.mean(accuracies), np.std(accuracies)
#     mean_prec, std_prec = np.mean(precisions), np.std(precisions)
#     mean_rec, std_rec = np.mean(recalls), np.std(recalls)
#     mean_f1, std_f1 = np.mean(f1_scores), np.std(f1_scores)
#     mean_auc, std_auc = np.mean(aucs), np.std(aucs)
#     mean_ce, std_ce = np.mean(ce_losses), np.std(ce_losses)

#     results_summary.append({
#         "λ_mod": lambda_mod,
#         "Accuracy": f"{mean_acc:.4f} ± {std_acc:.4f}",
#         "Precision": f"{mean_prec:.4f} ± {std_prec:.4f}",
#         "Recall": f"{mean_rec:.4f} ± {std_rec:.4f}",
#         "F1": f"{mean_f1:.4f} ± {std_f1:.4f}",
#         "AUC": f"{mean_auc:.4f} ± {std_auc:.4f}",
#         "CE Loss": f"{mean_ce:.4f} ± {std_ce:.4f}",
#     })

#     print(f"\n=== λ_mod = {lambda_mod} → Average Results ===")
#     print(f"Accuracy:  {mean_acc:.4f} ± {std_acc:.4f}")
#     print(f"Precision: {mean_prec:.4f} ± {std_prec:.4f}")
#     print(f"Recall:    {mean_rec:.4f} ± {std_rec:.4f}")
#     print(f"F1-score:  {mean_f1:.4f} ± {std_f1:.4f}")
#     print(f"AUC:       {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")
#     print(f"CE Loss:   {np.mean(ce_losses):.4f} ± {np.std(ce_losses):.4f}")

# # ==========================================
# # Final summary table for all λ_mod values
# # ==========================================
# print("\n\n========== FINAL SUMMARY TABLE (CN vs AD) ==========")
# results_df = pd.DataFrame(results_summary)
# print(results_df.to_string(index=False))

In [16]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import StratifiedShuffleSplit
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss

# # ==========================================
# # CONFIG
# # ==========================================
# SEED = 42
# np.random.seed(SEED)
# torch.manual_seed(SEED)
# if torch.cuda.is_available():
#     torch.cuda.manual_seed_all(SEED)

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# hidden_dim = 512
# num_epochs = 2000
# lr = 1e-4
# weight_decay = 1e-4
# batch_print_freq = 500  # print every 500 epochs

# # Use same λ list as before
# lambda_mod_list = [0.001, 0.005, 0.009, 0.01, 0.05, 0.09, 0.1, 0.3, 0.5, 0.9, 1, 2, 5, 8]



# results_summary = []


# for lambda_mod in lambda_mod_list:
#     print(f"\n==============================")
#     print(f" Running with λ_mod = {lambda_mod}")
#     print(f"==============================")

#     accuracies, precisions, recalls, f1_scores, aucs, ce_losses = [], [], [], [], [], []

#     sss = StratifiedShuffleSplit(n_splits=20, test_size=0.1, random_state=SEED)

#     for fold, (train_val_idx, test_idx_global) in enumerate(sss.split(X, y), start=1):
#         print(f"\n=== Fold {fold} ===")

#         cn_idx = np.where(y == 0)[0]
#         ad_idx = np.where(y == 1)[0]

#         sss_class = StratifiedShuffleSplit(n_splits=20, test_size=0.1, random_state=fold)
#         cn_train_idx, _ = next(sss_class.split(X[cn_idx], y[cn_idx]))
#         ad_train_idx, _ = next(sss_class.split(X[ad_idx], y[ad_idx]))

#         cn_train = cn_idx[cn_train_idx]
#         ad_train = ad_idx[ad_train_idx]
#         train_idx_final = np.concatenate([cn_train, ad_train])
#         np.random.shuffle(train_idx_final)

#         print(f"Train CN: {len(cn_train)}, Train AD: {len(ad_train)}")

#         train_idx_t = torch.from_numpy(train_idx_final).long().to(device)
#         y_train_tensor = torch.from_numpy(y[train_idx_final]).long().to(device)

#         # Initialize model
#         model = GCN_SemiSupervised(feats_dim, hidden_dim, num_classes, device, activ="RELU").to(device)
#         optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
#         ce_loss = nn.CrossEntropyLoss()

#         # Training loop
#         for epoch in range(1, num_epochs + 1):
#             model.train()
#             optimizer.zero_grad()

#             logits = model(data)
#             loss_sup = ce_loss(logits[train_idx_t], y_train_tensor)
#             loss_unsup = model.modularity_loss(A_tensor, logits)
#             total_loss = loss_sup + lambda_mod * loss_unsup

#             total_loss.backward()
#             optimizer.step()

#             if epoch % batch_print_freq == 0 or epoch == 1:
#                 model.eval()
#                 with torch.no_grad():
#                     preds_train = logits[train_idx_t].argmax(dim=1)
#                     acc_train = accuracy_score(y_train_tensor.cpu(), preds_train.cpu())
#                 print(f"Fold {fold} Epoch {epoch}: "
#                       f"TotalLoss={total_loss.item():.6f} | Sup={loss_sup.item():.6f} | "
#                       f"Unsup={loss_unsup.item():.6f} | TrainAcc={acc_train:.4f}")

#         # Evaluation
#         model.eval()
#         with torch.no_grad():
#             out = model(data)
#             preds = out.argmax(dim=1).cpu().numpy()
#             probs = torch.softmax(out, dim=1)[:, 1].cpu().numpy()  # probability for "AD"

#         y_test = y[test_idx_global]
#         y_pred_test = preds[test_idx_global]
#         y_prob_test = probs[test_idx_global]

#         acc = accuracy_score(y_test, y_pred_test)
#         prec = precision_score(y_test, y_pred_test, zero_division=0)
#         rec = recall_score(y_test, y_pred_test, zero_division=0)
#         f1 = f1_score(y_test, y_pred_test, zero_division=0)
#         auc = roc_auc_score(y_test, y_prob_test)
#         ce = log_loss(y_test, y_prob_test)

#         accuracies.append(acc)
#         precisions.append(prec)
#         recalls.append(rec)
#         f1_scores.append(f1)
#         aucs.append(auc)
#         ce_losses.append(ce)

#         print(f"Fold {fold} → "
#               f"Acc={acc:.4f} | Prec={prec:.4f} | Rec={rec:.4f} | "
#               f"F1={f1:.4f} | AUC={auc:.4f} | CE Loss={ce:.4f}")

#     # Average results per λ_mod
#     mean_acc, std_acc = np.mean(accuracies), np.std(accuracies)
#     mean_prec, std_prec = np.mean(precisions), np.std(precisions)
#     mean_rec, std_rec = np.mean(recalls), np.std(recalls)
#     mean_f1, std_f1 = np.mean(f1_scores), np.std(f1_scores)
#     mean_auc, std_auc = np.mean(aucs), np.std(aucs)
#     mean_ce, std_ce = np.mean(ce_losses), np.std(ce_losses)

#     results_summary.append({
#         "λ_mod": lambda_mod,
#         "Accuracy": f"{mean_acc:.4f} ± {std_acc:.4f}",
#         "Precision": f"{mean_prec:.4f} ± {std_prec:.4f}",
#         "Recall": f"{mean_rec:.4f} ± {std_rec:.4f}",
#         "F1": f"{mean_f1:.4f} ± {std_f1:.4f}",
#         "AUC": f"{mean_auc:.4f} ± {std_auc:.4f}",
#         "CE Loss": f"{mean_ce:.4f} ± {std_ce:.4f}",
#     })

#     print(f"\n=== λ_mod = {lambda_mod} → Average Results ===")
#     print(f"Accuracy:  {mean_acc:.4f} ± {std_acc:.4f}")
#     print(f"Precision: {mean_prec:.4f} ± {std_prec:.4f}")
#     print(f"Recall:    {mean_rec:.4f} ± {std_rec:.4f}")
#     print(f"F1-score:  {mean_f1:.4f} ± {std_f1:.4f}")
#     print(f"AUC:       {np.mean(aucs):.4f} ± {np.std(aucs):.4f}")
#     print(f"CE Loss:   {np.mean(ce_losses):.4f} ± {np.std(ce_losses):.4f}")

# # ==========================================
# # Final summary table for all λ_mod values
# # ==========================================
# print("\n\n========== FINAL SUMMARY TABLE (CN vs AD) ==========")
# results_df = pd.DataFrame(results_summary)
# print(results_df.to_string(index=False))