In [4]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.2.0+cu121.html
!pip install torch-sparse  -f https://data.pyg.org/whl/torch-2.2.0+cu121.html
!pip install torch-geometric

!pip install --use-deprecated=legacy-resolver karateclub networkx numpy pandas matplotlib scikit-learn

!pip install torch torchvision torchaudio
!pip install torch-geometric \
    -f https://data.pyg.org/whl/torch-$(python -c "import torch; print(torch.__version__)").html


!pip install optuna
!pip install karateclub

Looking in links: https://data.pyg.org/whl/torch-2.2.0+cu121.html
Looking in links: https://data.pyg.org/whl/torch-2.2.0+cu121.html
Looking in links: https://data.pyg.org/whl/torch-2.9.0+cpu.html
Collecting numpy<1.23.0 (from karateclub)
  Using cached numpy-1.22.4.zip (11.5 MB)
  Installing build dependencies ... [?25l[?25hdone
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Getting requirements to build wheel ... [?25l[?25herror
[1;31merror[0m: [1msubprocess-exited-with-error[0m

[31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
[31m│[0m exit code: [1;36m1[0m
[31m╰─>[0m See above for output.

[1;35mnote[0m: This error originates from a subprocess, and is likely not a p

In [5]:
import os
from google.colab import drive
drive.mount('/content/drive')

# Match your folders
BASE_CLASSIF_DIR = "/content/drive/MyDrive/InformationSystems/Classification"
MODELS_DIR = f"{BASE_CLASSIF_DIR}/models"
RESULTS_DIR = "/content/drive/MyDrive/InformationSystems/Stability_NEW"
STABILITY_DIR = f"{RESULTS_DIR}/results"

os.makedirs(STABILITY_DIR, exist_ok=True)
print("MODELS_DIR:", MODELS_DIR)
print("STABILITY_DIR:", STABILITY_DIR)

Mounted at /content/drive
MODELS_DIR: /content/drive/MyDrive/InformationSystems/Classification/models
STABILITY_DIR: /content/drive/MyDrive/InformationSystems/Stability_NEW/results


In [6]:

import matplotlib.pyplot as plt

In [7]:
import time, json, random, psutil
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC

import networkx as nx
from karateclub import Graph2Vec, NetLSD

from torch.nn import Linear, Sequential, ReLU, BatchNorm1d
from torch_geometric.nn import GINConv, global_add_pool
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.utils import to_networkx, to_undirected
from torch_geometric.transforms import OneHotDegree

import warnings
warnings.filterwarnings("ignore")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE:", DEVICE)

def set_all_seeds(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

  import torch_geometric.typing
  import torch_geometric.typing


DEVICE: cpu


In [8]:
def make_deterministic_perm(n: int, seed: int) -> np.ndarray:
    return np.random.RandomState(seed).permutation(n)

def sanitize_embeddings(embeddings: np.ndarray) -> np.ndarray:
    emb = np.asarray(embeddings, dtype=np.float32)
    emb = np.nan_to_num(emb, nan=0.0, posinf=0.0, neginf=0.0)
    return emb

def load_pyg_dataset_deterministic(dataset_name: str, seed: int):
    """
    Loads TUDataset with deterministic perm order.
    If no node features -> apply OneHotDegree like your GIN pipeline.
    Returns: dataset (reordered), perm (original indices used)
    """
    dataset_raw = TUDataset(root='data/TUDataset', name=dataset_name)
    perm = make_deterministic_perm(len(dataset_raw), seed)
    dataset = dataset_raw[perm.tolist()]

    if dataset.num_features == 0 or dataset[0].x is None:
        # compute max degree across graphs
        max_degree = 0
        for data in dataset:
            deg = torch.bincount(data.edge_index[0], minlength=data.num_nodes)
            max_degree = max(max_degree, int(deg.max()))
        oh = OneHotDegree(max_degree=max_degree)

        dataset_raw2 = TUDataset(root='data/TUDataset', name=dataset_name, transform=oh)
        perm2 = make_deterministic_perm(len(dataset_raw2), seed)
        dataset2 = dataset_raw2[perm2.tolist()]
        return dataset2, perm2

    return dataset, perm

def pyg_to_nx_list(dataset):
    graphs, labels = [], []
    for data in dataset:
        g = to_networkx(data, to_undirected=True)
        graphs.append(g)
        labels.append(int(data.y.item()))
    return graphs, np.array(labels, dtype=int)

def enzymes_filter_mask_from_nx(graphs, min_nodes=3):
    return np.array([g.number_of_nodes() >= min_nodes for g in graphs], dtype=bool)

def recreate_split_positions_for_unsup(dataset_name: str, seed: int, test_size=0.2):
    """
    Recreates the SAME split logic as your Graph2Vec/NetLSD pipelines:
      - deterministic perm
      - convert to nx
      - ENZYMES filter (min 3 nodes)
      - train_test_split(stratify, random_state=seed, test_size=0.2)
    Returns:
      graphs (filtered), labels (filtered), ds_indices (orig indices after perm+filter),
      train_pos, test_pos
    """
    dataset, perm = load_pyg_dataset_deterministic(dataset_name, seed)
    graphs, labels = pyg_to_nx_list(dataset)
    ds_indices = perm.copy()

    if dataset_name.upper() == "ENZYMES":
        mask = enzymes_filter_mask_from_nx(graphs, min_nodes=3)
        graphs = [g for g, keep in zip(graphs, mask) if keep]
        labels = labels[mask]
        ds_indices = ds_indices[mask]

    pos = np.arange(len(labels))
    train_pos, test_pos, y_train, y_test = train_test_split(
        pos, labels, test_size=test_size, random_state=seed, stratify=labels
    )
    return graphs, labels, ds_indices, train_pos, test_pos

In [9]:
def perturb_edges_nx(g: nx.Graph, remove_pct=0.05, add_pct=0.05, seed=0):
    """
    Randomly remove % of existing edges and add % of non-edges (simple undirected graph).
    """
    rng = np.random.RandomState(seed)
    g2 = g.copy()
    if g2.number_of_nodes() < 2:
        return g2

    edges = list(g2.edges())
    m = len(edges)

    # remove edges
    k_remove = int(np.floor(remove_pct * m))
    if k_remove > 0 and m > 0:
        rem_idx = rng.choice(np.arange(m), size=min(k_remove, m), replace=False)
        for i in rem_idx:
            u, v = edges[i]
            if g2.has_edge(u, v):
                g2.remove_edge(u, v)

    # add edges
    non_edges = list(nx.non_edges(g2))
    k_add = int(np.floor(add_pct * max(1, m)))
    if k_add > 0 and len(non_edges) > 0:
        add_idx = rng.choice(np.arange(len(non_edges)), size=min(k_add, len(non_edges)), replace=False)
        for i in add_idx:
            u, v = non_edges[i]
            if u != v and not g2.has_edge(u, v):
                g2.add_edge(u, v)

    return g2

def shuffle_node_attributes_pyg(data, seed=0):
    """
    Shuffle rows of node feature matrix x within graph.
    """
    if data.x is None:
        return data
    rng = np.random.RandomState(seed)
    perm = torch.tensor(rng.permutation(data.num_nodes), dtype=torch.long)
    d2 = data.clone()
    d2.x = data.x[perm]
    return d2

def perturb_edges_pyg(data, remove_pct=0.05, add_pct=0.05, seed=0):
    """
    Perturb PyG graph edges; rebuild undirected edge_index.
    """
    rng = np.random.RandomState(seed)
    d2 = data.clone()
    if d2.edge_index is None or d2.edge_index.numel() == 0:
        return d2

    ei = to_undirected(d2.edge_index)
    u = ei[0].cpu().numpy()
    v = ei[1].cpu().numpy()

    # unique undirected pairs
    pairs = set()
    for a, b in zip(u, v):
        if a == b:
            continue
        x, y = (a, b) if a < b else (b, a)
        pairs.add((x, y))
    pairs = list(pairs)

    m = len(pairs)
    n = int(d2.num_nodes)

    # remove
    k_remove = int(np.floor(remove_pct * m))
    if k_remove > 0 and m > 0:
        rem_idx = rng.choice(np.arange(m), size=min(k_remove, m), replace=False)
        rem_set = set([pairs[i] for i in rem_idx])
        pairs = [e for e in pairs if e not in rem_set]

    # add
    k_add = int(np.floor(add_pct * max(1, m)))
    if k_add > 0 and n > 1:
        existing = set(pairs)
        tries = 0
        while k_add > 0 and tries < 50_000:
            a = rng.randint(0, n)
            b = rng.randint(0, n)
            if a == b:
                tries += 1
                continue
            x, y = (a, b) if a < b else (b, a)
            if (x, y) not in existing:
                existing.add((x, y))
                pairs.append((x, y))
                k_add -= 1
            tries += 1

    # rebuild edge_index with both directions
    if len(pairs) == 0:
        d2.edge_index = torch.empty((2, 0), dtype=torch.long)
        return d2

    src, dst = [], []
    for a, b in pairs:
        src += [a, b]
        dst += [b, a]
    d2.edge_index = torch.tensor([src, dst], dtype=torch.long)
    return d2

def mean_cosine_similarity(A: np.ndarray, B: np.ndarray, eps=1e-12) -> float:
    A = A.astype(np.float32); B = B.astype(np.float32)
    An = A / (np.linalg.norm(A, axis=1, keepdims=True) + eps)
    Bn = B / (np.linalg.norm(B, axis=1, keepdims=True) + eps)
    return float(np.mean(np.sum(An * Bn, axis=1)))

In [10]:
class GIN(torch.nn.Module):
    def __init__(self, num_features, hidden_dim, num_classes, num_layers=5, dropout=0.5):
        super().__init__()
        layers = []
        in_dim = num_features
        for _ in range(num_layers):
            nn = Sequential(
                Linear(in_dim, hidden_dim),
                ReLU(),
                Linear(hidden_dim, hidden_dim),
            )
            layers.append(GINConv(nn))
            in_dim = hidden_dim

        self.convs = torch.nn.ModuleList(layers)
        self.bns = torch.nn.ModuleList([BatchNorm1d(hidden_dim) for _ in range(num_layers)])
        self.fc1 = Linear(hidden_dim, hidden_dim)
        self.fc2 = Linear(hidden_dim, num_classes)
        self.dropout = dropout

    def forward(self, x, edge_index, batch):
        for conv, bn in zip(self.convs, self.bns):
            x = F.relu(conv(x, edge_index))
            x = bn(x)
        x = global_add_pool(x, batch)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)
        return x

def evaluate(model, loader, criterion):
    model.eval()
    preds, labels, probs = [], [], []
    total_loss = 0.0
    num_batches = 0

    with torch.no_grad():
        for data in loader:
            data = data.to(DEVICE)
            out = model(data.x, data.edge_index, data.batch)

            loss = criterion(out, data.y)
            total_loss += loss.item()
            num_batches += 1

            pred = out.argmax(dim=1)
            preds.extend(pred.cpu().numpy())
            labels.extend(data.y.cpu().numpy())
            probs.extend(F.softmax(out, dim=1).cpu().numpy())

    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    avg_loss = total_loss / max(1, num_batches)

    try:
        if len(np.unique(labels)) == 2:
            auc = roc_auc_score(labels, np.array(probs)[:, 1])
        else:
            auc = roc_auc_score(labels, probs, multi_class='ovr')
    except ValueError:
        auc = np.nan

    return acc, f1, auc, avg_loss

def get_gin_embeddings(model, loader):
    """
    Graph-level embeddings after pooling (global_add_pool).
    """
    model.eval()
    all_emb = []
    all_labels = []
    with torch.no_grad():
        for data in loader:
            data = data.to(DEVICE)
            x, edge_index, batch = data.x, data.edge_index, data.batch

            for conv, bn in zip(model.convs, model.bns):
                x = F.relu(conv(x, edge_index))
                x = bn(x)

            x = global_add_pool(x, batch)
            all_emb.append(x.detach().cpu().numpy())
            all_labels.extend(data.y.detach().cpu().numpy())

    embeddings = np.concatenate(all_emb, axis=0) if len(all_emb) else np.zeros((0, 1), dtype=np.float32)
    labels = np.array(all_labels, dtype=int)
    return embeddings, labels

In [11]:
def embed_graph2vec(graphs, embedding_dim=128, epochs=200, seed=0):
    set_all_seeds(seed)
    g2v = Graph2Vec(dimensions=embedding_dim, wl_iterations=2, epochs=epochs, workers=os.cpu_count())
    g2v.fit(graphs)
    return sanitize_embeddings(g2v.get_embedding())

def embed_netlsd(graphs, seed=0):
    # NetLSD doesn't expose many knobs; seed only for consistency of our pipeline
    set_all_seeds(seed)
    model = NetLSD()
    model.fit(graphs)
    return sanitize_embeddings(model.get_embedding())

def train_svm_on_embeddings(X_train, y_train, C=1.0, gamma="scale", seed=0):
    clf = make_pipeline(
        StandardScaler(),
        SVC(kernel="rbf", probability=False, C=C, gamma=gamma, random_state=seed)
    )
    clf.fit(X_train, y_train)
    return clf

def eval_accuracy(clf, X_test, y_test):
    y_pred = clf.predict(X_test)
    return float(accuracy_score(y_test, y_pred))

In [12]:
def find_gin_ckpt(dataset_name, seed):
    p1 = f"{MODELS_DIR}/GIN_{dataset_name}_{seed}.pth"
    if os.path.exists(p1):
        return p1

    # fallback: search in directory
    if not os.path.exists(MODELS_DIR):
        raise FileNotFoundError(f"MODELS_DIR not found: {MODELS_DIR}")

    for fn in os.listdir(MODELS_DIR):
        if fn.startswith(f"GIN_{dataset_name}_") and fn.endswith(".pth"):
            # accept if seed appears
            if f"_{seed}.pth" in fn:
                return os.path.join(MODELS_DIR, fn)

    raise FileNotFoundError(
        f"Could not find GIN checkpoint for dataset={dataset_name}, seed={seed} in {MODELS_DIR}"
    )

In [13]:
def run_stability_unsupervised(
    method_name: str,             # "Graph2Vec" | "NetLSD"
    dataset_name: str,
    seed: int,
    remove_pct=0.05,
    add_pct=0.05,
    g2v_dim=128,
    g2v_epochs=200,
    svm_C=1.0,
    svm_gamma="scale",
    test_size=0.2,
):
    graphs, labels, ds_indices, train_pos, test_pos = recreate_split_positions_for_unsup(
        dataset_name, seed, test_size=test_size
    )

    # Order = train then test (same as your pipeline does for saving embeddings)
    order_pos = np.concatenate([train_pos, test_pos])
    graphs_ordered = [graphs[i] for i in order_pos]
    labels_ordered = labels[order_pos]

    # Original embeddings
    if method_name == "Graph2Vec":
        E0 = embed_graph2vec(graphs_ordered, embedding_dim=g2v_dim, epochs=g2v_epochs, seed=seed)
    elif method_name == "NetLSD":
        E0 = embed_netlsd(graphs_ordered, seed=seed)
    else:
        raise ValueError("Unknown unsupervised method")

    # Perturb graphs
    graphs_pert = [
        perturb_edges_nx(g, remove_pct=remove_pct, add_pct=add_pct, seed=seed*10_000 + i)
        for i, g in enumerate(graphs_ordered)
    ]

    # Perturbed embeddings (new run)
    if method_name == "Graph2Vec":
        E1 = embed_graph2vec(graphs_pert, embedding_dim=g2v_dim, epochs=g2v_epochs, seed=seed+999)
    else:
        E1 = embed_netlsd(graphs_pert, seed=seed+999)

    stab = mean_cosine_similarity(E0, E1)

    # Accuracy drop: train on ORIGINAL train embeddings; test on original vs perturbed test embeddings
    n_train = len(train_pos)
    X0_tr, X0_te = E0[:n_train], E0[n_train:]
    X1_te = E1[n_train:]
    y_tr, y_te = labels_ordered[:n_train], labels_ordered[n_train:]

    clf = train_svm_on_embeddings(X0_tr, y_tr, C=svm_C, gamma=svm_gamma, seed=seed)
    acc0 = eval_accuracy(clf, X0_te, y_te)
    acc1 = eval_accuracy(clf, X1_te, y_te)

    row = {
        "part": "c",
        "method": method_name,
        "dataset": dataset_name,
        "seed": seed,
        "perturb_remove_pct": remove_pct,
        "perturb_add_pct": add_pct,
        "attr_shuffle": False,
        "stability_mean_cosine": float(stab),
        "acc_original_test": float(acc0),
        "acc_perturbed_test": float(acc1),
        "acc_drop": float(acc0 - acc1),
        "n_graphs": int(len(labels_ordered)),
        "n_train": int(n_train),
        "timestamp": time.time(),
    }
    return row


def run_stability_gin(
    dataset_name: str,
    seed: int,
    remove_pct=0.05,
    add_pct=0.05,
    shuffle_attr=True,
    batch_size=32,
):
    """
    Loads saved GIN checkpoint and measures:
      - embedding stability on full dataset
      - accuracy drop on the same 80/20 split as your GIN pipeline (first 80% train, last 20% test)
    """
    set_all_seeds(seed)

    dataset, perm = load_pyg_dataset_deterministic(dataset_name, seed)

    ckpt_path = find_gin_ckpt(dataset_name, seed)
    ckpt = torch.load(ckpt_path, map_location="cpu")

    model = GIN(
        num_features=ckpt["num_node_features"],
        hidden_dim=ckpt["hidden_dim"],
        num_classes=ckpt["num_classes"],
        num_layers=ckpt["num_layers"],
        dropout=ckpt["dropout"],
    ).to(DEVICE)
    model.load_state_dict(ckpt["state_dict"])
    model.eval()

    # Original embeddings (whole dataset)
    loader_full = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    E0, y0 = get_gin_embeddings(model, loader_full)

    # Perturbed dataset
    pert_list = []
    for i, data in enumerate(dataset):
        d = perturb_edges_pyg(data, remove_pct=remove_pct, add_pct=add_pct, seed=seed*10_000 + i)
        if shuffle_attr:
            d = shuffle_node_attributes_pyg(d, seed=seed*20_000 + i)
        pert_list.append(d)

    loader_full_pert = DataLoader(pert_list, batch_size=batch_size, shuffle=False)
    E1, y1 = get_gin_embeddings(model, loader_full_pert)

    stab = mean_cosine_similarity(E0, E1)

    # Accuracy on test split (same as your training pipeline slicing)
    n = len(dataset)
    n_train = int(0.8 * n)

    crit = torch.nn.CrossEntropyLoss()

    test_loader = DataLoader(dataset[n_train:], batch_size=batch_size, shuffle=False)
    acc0, _, _, _ = evaluate(model, test_loader, crit)

    test_loader_pert = DataLoader(pert_list[n_train:], batch_size=batch_size, shuffle=False)
    acc1, _, _, _ = evaluate(model, test_loader_pert, crit)

    row = {
        "part": "c",
        "method": "GIN",
        "dataset": dataset_name,
        "seed": seed,
        "perturb_remove_pct": remove_pct,
        "perturb_add_pct": add_pct,
        "attr_shuffle": bool(shuffle_attr),
        "stability_mean_cosine": float(stab),
        "acc_original_test": float(acc0),
        "acc_perturbed_test": float(acc1),
        "acc_drop": float(acc0 - acc1),
        "n_graphs": int(n),
        "n_train": int(n_train),
        "timestamp": time.time(),
        "gin_ckpt_path": ckpt_path,
    }
    return row

In [14]:
def run_stability_experiments(
    datasets=("MUTAG", "ENZYMES", "IMDB-MULTI"),
    seeds=(42, 43, 44),
    remove_pcts=(0.0, 0.05, 0.1, 0.2),
    add_pcts=(0.0, 0.05, 0.1, 0.2),
    g2v_dim=128,
    g2v_epochs=200,
    out_csv_name="stability_results.csv"
):
    """
    Runs TWO separate experiment sweeps:

    A) removal-only: vary remove_pct, set add_pct=0
    B) addition-only: vary add_pct, set remove_pct=0

    Also runs GIN twice for each condition: shuffle_attr False/True.
    Adds a column 'perturb_mode' in results.
    """
    rows = []
    start = time.time()

    # -------- A) Removal-only sweep --------
    for ds in datasets:
        for seed in seeds:
            for rp in remove_pcts:
                ap = 0.0
                print(f"\n--- [REMOVAL-ONLY] {ds} seed={seed} remove={rp} add={ap} ---")

                # Graph2Vec
                r = run_stability_unsupervised(
                    method_name="Graph2Vec",
                    dataset_name=ds,
                    seed=seed,
                    remove_pct=rp,
                    add_pct=ap,
                    g2v_dim=g2v_dim,
                    g2v_epochs=g2v_epochs,
                )
                r["perturb_mode"] = "removal_only"
                rows.append(r)
                print("  Graph2Vec done")

                # NetLSD
                r = run_stability_unsupervised(
                    method_name="NetLSD",
                    dataset_name=ds,
                    seed=seed,
                    remove_pct=rp,
                    add_pct=ap,
                )
                r["perturb_mode"] = "removal_only"
                rows.append(r)
                print("  NetLSD done")

                # GIN: no-shuffle + shuffle
                for shuffle in (False, True):
                    r = run_stability_gin(
                        dataset_name=ds,
                        seed=seed,
                        remove_pct=rp,
                        add_pct=ap,
                        shuffle_attr=shuffle,
                        batch_size=32
                    )
                    r["perturb_mode"] = "removal_only"
                    rows.append(r)
                    print(f"  GIN done (shuffle_attr={shuffle})")

    # -------- B) Addition-only sweep --------
    for ds in datasets:
        for seed in seeds:
            for ap in add_pcts:
                rp = 0.0
                print(f"\n--- [ADDITION-ONLY] {ds} seed={seed} remove={rp} add={ap} ---")

                # Graph2Vec
                r = run_stability_unsupervised(
                    method_name="Graph2Vec",
                    dataset_name=ds,
                    seed=seed,
                    remove_pct=rp,
                    add_pct=ap,
                    g2v_dim=g2v_dim,
                    g2v_epochs=g2v_epochs,
                )
                r["perturb_mode"] = "addition_only"
                rows.append(r)
                print("  Graph2Vec done")

                # NetLSD
                r = run_stability_unsupervised(
                    method_name="NetLSD",
                    dataset_name=ds,
                    seed=seed,
                    remove_pct=rp,
                    add_pct=ap,
                )
                r["perturb_mode"] = "addition_only"
                rows.append(r)
                print("  NetLSD done")

                # GIN: no-shuffle + shuffle
                for shuffle in (False, True):
                    r = run_stability_gin(
                        dataset_name=ds,
                        seed=seed,
                        remove_pct=rp,
                        add_pct=ap,
                        shuffle_attr=shuffle,
                        batch_size=32
                    )
                    r["perturb_mode"] = "addition_only"
                    rows.append(r)
                    print(f"  GIN done (shuffle_attr={shuffle})")

    df = pd.DataFrame(rows)

    out_csv = os.path.join(STABILITY_DIR, out_csv_name)
    if os.path.exists(out_csv):
        df.to_csv(out_csv, mode="a", index=False, header=False)
    else:
        df.to_csv(out_csv, index=False)

    elapsed = time.time() - start
    print("\nSaved:", out_csv)
    print("Elapsed (s):", round(elapsed, 2))
    return df

In [15]:
df_stab = run_stability_experiments(
    datasets=("MUTAG", "ENZYMES", "IMDB-MULTI"),
    seeds=(42, 43, 44),
    remove_pcts=(0.0, 0.05, 0.1, 0.2),
    add_pcts=(0.0, 0.05, 0.1, 0.2),
    g2v_dim=128,
    g2v_epochs=200,
    out_csv_name="stability_results.csv"
)

df_stab


--- [REMOVAL-ONLY] MUTAG seed=42 remove=0.0 add=0.0 ---


Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Processing...
Done!


  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] MUTAG seed=42 remove=0.05 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] MUTAG seed=42 remove=0.1 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] MUTAG seed=42 remove=0.2 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] MUTAG seed=43 remove=0.0 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] MUTAG seed=43 remove=0.05 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] MUTAG seed=43 remove=0.1 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Processing...
Done!


  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] ENZYMES seed=42 remove=0.05 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] ENZYMES seed=42 remove=0.1 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] ENZYMES seed=42 remove=0.2 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] ENZYMES seed=43 remove=0.0 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] ENZYMES seed=43 remove=0.05 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] ENZYMES seed=43 remove=0.1 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuff

Downloading https://www.chrsmrrs.com/graphkerneldatasets/IMDB-MULTI.zip
Processing...
Done!


  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] IMDB-MULTI seed=42 remove=0.05 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] IMDB-MULTI seed=42 remove=0.1 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] IMDB-MULTI seed=42 remove=0.2 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] IMDB-MULTI seed=43 remove=0.0 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] IMDB-MULTI seed=43 remove=0.05 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)
  GIN done (shuffle_attr=True)

--- [REMOVAL-ONLY] IMDB-MULTI seed=43 remove=0.1 add=0.0 ---
  Graph2Vec done
  NetLSD done
  GIN done (shuffle_attr=False)

Unnamed: 0,part,method,dataset,seed,perturb_remove_pct,perturb_add_pct,attr_shuffle,stability_mean_cosine,acc_original_test,acc_perturbed_test,acc_drop,n_graphs,n_train,timestamp,perturb_mode,gin_ckpt_path
0,c,Graph2Vec,MUTAG,42,0.00,0.0,False,0.998024,0.763158,0.763158,0.000000,188,150,1.769552e+09,removal_only,
1,c,NetLSD,MUTAG,42,0.00,0.0,False,1.000000,0.815789,0.815789,0.000000,188,150,1.769552e+09,removal_only,
2,c,GIN,MUTAG,42,0.00,0.0,False,1.000000,0.868421,0.868421,0.000000,188,150,1.769552e+09,removal_only,/content/drive/MyDrive/InformationSystems/Clas...
3,c,GIN,MUTAG,42,0.00,0.0,True,0.580944,0.868421,0.473684,0.394737,188,150,1.769552e+09,removal_only,/content/drive/MyDrive/InformationSystems/Clas...
4,c,Graph2Vec,MUTAG,42,0.05,0.0,False,0.765581,0.763158,0.684211,0.078947,188,150,1.769552e+09,removal_only,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,c,GIN,IMDB-MULTI,44,0.00,0.1,True,0.803556,0.363333,0.366667,-0.003333,1500,1200,1.769558e+09,addition_only,/content/drive/MyDrive/InformationSystems/Clas...
284,c,Graph2Vec,IMDB-MULTI,44,0.00,0.2,False,0.496331,0.483333,0.460000,0.023333,1500,1200,1.769558e+09,addition_only,
285,c,NetLSD,IMDB-MULTI,44,0.00,0.2,False,0.999984,0.470000,0.473333,-0.003333,1500,1200,1.769558e+09,addition_only,
286,c,GIN,IMDB-MULTI,44,0.00,0.2,False,0.936243,0.363333,0.376667,-0.013333,1500,1200,1.769559e+09,addition_only,/content/drive/MyDrive/InformationSystems/Clas...


In [16]:
def _pm(mean, std, decimals=4):
    """Format mean ± std nicely."""
    if mean is None or (isinstance(mean, float) and np.isnan(mean)):
        return None
    if std is None or (isinstance(std, float) and np.isnan(std)):
        std = 0.0
    return f"{mean:.{decimals}f} ± {std:.{decimals}f}"

def summarize_stability(df: pd.DataFrame, decimals=4):
    """
    Summary grouped by:
      method, dataset, remove_pct, add_pct, attr_shuffle
    so GIN shuffle/no-shuffle are separated.

    Adds both numeric mean/std and pretty 'mean ± std' columns.
    """
    group_cols = ["method", "dataset", "perturb_remove_pct", "perturb_add_pct", "attr_shuffle"]

    g = df.groupby(group_cols, dropna=False)

    summary = g.agg(
        stability_mean=("stability_mean_cosine", "mean"),
        stability_std=("stability_mean_cosine", "std"),
        acc_drop_mean=("acc_drop", "mean"),
        acc_drop_std=("acc_drop", "std"),
        acc_original_mean=("acc_original_test", "mean"),
        acc_original_std=("acc_original_test", "std"),
        acc_perturbed_mean=("acc_perturbed_test", "mean"),
        acc_perturbed_std=("acc_perturbed_test", "std"),
        n_runs=("seed", "count"),
    ).reset_index()

    # Fill NaN std (happens if n_runs=1) with 0 for pretty printing
    for c in ["stability_std", "acc_drop_std", "acc_original_std", "acc_perturbed_std"]:
        summary[c] = summary[c].fillna(0.0)

    # Add formatted columns "mean ± std"
    summary["stability_mean±std"] = [
        _pm(m, s, decimals) for m, s in zip(summary["stability_mean"], summary["stability_std"])
    ]
    summary["acc_drop_mean±std"] = [
        _pm(m, s, decimals) for m, s in zip(summary["acc_drop_mean"], summary["acc_drop_std"])
    ]
    summary["acc_original_mean±std"] = [
        _pm(m, s, decimals) for m, s in zip(summary["acc_original_mean"], summary["acc_original_std"])
    ]
    summary["acc_perturbed_mean±std"] = [
        _pm(m, s, decimals) for m, s in zip(summary["acc_perturbed_mean"], summary["acc_perturbed_std"])
    ]

    # Optional: create a nicer method label for plots/tables
    def _method_label(row):
        if row["method"] == "GIN":
            return "GIN (shuffle)" if bool(row["attr_shuffle"]) else "GIN (no-shuffle)"
        return row["method"]

    summary["method_label"] = summary.apply(_method_label, axis=1)

    # Sort nicely
    summary = summary.sort_values(
        ["dataset", "perturb_remove_pct", "perturb_add_pct", "method_label"]
    ).reset_index(drop=True)

    return summary

summary = summarize_stability(df_stab, decimals=4)
summary

Unnamed: 0,method,dataset,perturb_remove_pct,perturb_add_pct,attr_shuffle,stability_mean,stability_std,acc_drop_mean,acc_drop_std,acc_original_mean,acc_original_std,acc_perturbed_mean,acc_perturbed_std,n_runs,stability_mean±std,acc_drop_mean±std,acc_original_mean±std,acc_perturbed_mean±std,method_label
0,GIN,ENZYMES,0.0,0.00,False,1.000000,0.000000,0.000000,0.000000,0.413889,0.056437,0.413889,0.056437,6,1.0000 ± 0.0000,0.0000 ± 0.0000,0.4139 ± 0.0564,0.4139 ± 0.0564,GIN (no-shuffle)
1,GIN,ENZYMES,0.0,0.00,True,0.482710,0.067548,0.177778,0.078292,0.413889,0.056437,0.236111,0.035224,6,0.4827 ± 0.0675,0.1778 ± 0.0783,0.4139 ± 0.0564,0.2361 ± 0.0352,GIN (shuffle)
2,Graph2Vec,ENZYMES,0.0,0.00,False,0.937519,0.009736,0.006944,0.020694,0.375000,0.042817,0.368056,0.038520,6,0.9375 ± 0.0097,0.0069 ± 0.0207,0.3750 ± 0.0428,0.3681 ± 0.0385,Graph2Vec
3,NetLSD,ENZYMES,0.0,0.00,False,1.000000,0.000000,0.000000,0.000000,0.286111,0.004303,0.286111,0.004303,6,1.0000 ± 0.0000,0.0000 ± 0.0000,0.2861 ± 0.0043,0.2861 ± 0.0043,NetLSD
4,GIN,ENZYMES,0.0,0.05,False,0.931003,0.015424,0.030556,0.012729,0.413889,0.063099,0.383333,0.065085,3,0.9310 ± 0.0154,0.0306 ± 0.0127,0.4139 ± 0.0631,0.3833 ± 0.0651,GIN (no-shuffle)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,NetLSD,MUTAG,0.1,0.00,False,0.999648,0.000014,0.087719,0.030387,0.833333,0.030387,0.745614,0.030387,3,0.9996 ± 0.0000,0.0877 ± 0.0304,0.8333 ± 0.0304,0.7456 ± 0.0304,NetLSD
80,GIN,MUTAG,0.2,0.00,False,0.439274,0.190058,0.394737,0.069625,0.868421,0.026316,0.473684,0.094883,3,0.4393 ± 0.1901,0.3947 ± 0.0696,0.8684 ± 0.0263,0.4737 ± 0.0949,GIN (no-shuffle)
81,GIN,MUTAG,0.2,0.00,True,0.237696,0.236457,0.517544,0.109561,0.868421,0.026316,0.350877,0.092418,3,0.2377 ± 0.2365,0.5175 ± 0.1096,0.8684 ± 0.0263,0.3509 ± 0.0924,GIN (shuffle)
82,Graph2Vec,MUTAG,0.2,0.00,False,0.610271,0.005551,0.131579,0.026316,0.789474,0.026316,0.657895,0.000000,3,0.6103 ± 0.0056,0.1316 ± 0.0263,0.7895 ± 0.0263,0.6579 ± 0.0000,Graph2Vec


In [17]:
def save_acc_drop_plots(
    summary_df: pd.DataFrame,
    out_dir: str,
    x_mode: str = "pair",  # "pair" or "sum"
    title_prefix: str = "Accuracy drop vs perturbation",
    dpi: int = 180
):
    """
    Saves plots comparing acc_drop_mean across methods, per dataset.
    Uses the summarized dataframe (output of summarize_stability).

    x_mode:
      - "pair": x labels are "r=<rp>,a=<ap>" (categorical)
      - "sum": x = rp+ap (numeric)
    """
    os.makedirs(out_dir, exist_ok=True)

    datasets = sorted(summary_df["dataset"].unique())

    for ds in datasets:
        sdf = summary_df[summary_df["dataset"] == ds].copy()

        # method label includes GIN shuffle/no-shuffle
        sdf["method_label"] = sdf["method_label"].astype(str)

        plt.figure(figsize=(9, 5))

        if x_mode == "pair":
            sdf["x_label"] = sdf.apply(
                lambda r: f"r={r['perturb_remove_pct']}, a={r['perturb_add_pct']}", axis=1
            )
            # keep a stable order for x
            x_order = (
                sdf[["perturb_remove_pct", "perturb_add_pct", "x_label"]]
                .drop_duplicates()
                .sort_values(["perturb_remove_pct", "perturb_add_pct"])
            )
            x_labels = x_order["x_label"].tolist()

            for m in sorted(sdf["method_label"].unique()):
                ms = sdf[sdf["method_label"] == m].copy()
                # align to x_labels
                ms = ms.set_index("x_label").reindex(x_labels).reset_index()
                y = ms["acc_drop_mean"].values
                yerr = ms["acc_drop_std"].values
                plt.errorbar(x_labels, y, yerr=yerr, marker="o", linewidth=2, capsize=3, label=m)

            plt.xticks(rotation=45, ha="right")
            plt.xlabel("Perturbation (remove, add)")

        elif x_mode == "sum":
            sdf["x_sum"] = sdf["perturb_remove_pct"] + sdf["perturb_add_pct"]
            # if there are duplicates (different pairs same sum), average them
            # but keep method separation
            sdf2 = (sdf
                    .groupby(["dataset", "method_label", "attr_shuffle", "x_sum"], dropna=False)
                    .agg(acc_drop_mean=("acc_drop_mean", "mean"),
                         acc_drop_std=("acc_drop_std", "mean"))  # average stds roughly
                    .reset_index())

            for m in sorted(sdf2["method_label"].unique()):
                ms = sdf2[sdf2["method_label"] == m].sort_values("x_sum")
                x = ms["x_sum"].values
                y = ms["acc_drop_mean"].values
                yerr = ms["acc_drop_std"].values
                plt.errorbar(x, y, yerr=yerr, marker="o", linewidth=2, capsize=3, label=m)

            plt.xlabel("Perturbation strength (remove + add)")

        else:
            raise ValueError("x_mode must be 'pair' or 'sum'")

        plt.ylabel("Accuracy drop (mean)")
        plt.title(f"{title_prefix} — {ds}")
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.tight_layout()

        fname = f"acc_drop_{ds}_{x_mode}.png"
        fpath = os.path.join(out_dir, fname)
        plt.savefig(fpath, dpi=dpi)
        plt.close()

        print("Saved plot:", fpath)

In [18]:
PLOTS_DIR = os.path.join(STABILITY_DIR, "plots")
summary = summarize_stability(df_stab)

# Option A: categorical (remove,add) pairs
save_acc_drop_plots(summary, out_dir=PLOTS_DIR, x_mode="pair")

# Option B: scalar strength remove+add
save_acc_drop_plots(summary, out_dir=PLOTS_DIR, x_mode="sum")

Saved plot: /content/drive/MyDrive/InformationSystems/Stability_NEW/results/plots/acc_drop_ENZYMES_pair.png
Saved plot: /content/drive/MyDrive/InformationSystems/Stability_NEW/results/plots/acc_drop_IMDB-MULTI_pair.png
Saved plot: /content/drive/MyDrive/InformationSystems/Stability_NEW/results/plots/acc_drop_MUTAG_pair.png
Saved plot: /content/drive/MyDrive/InformationSystems/Stability_NEW/results/plots/acc_drop_ENZYMES_sum.png
Saved plot: /content/drive/MyDrive/InformationSystems/Stability_NEW/results/plots/acc_drop_IMDB-MULTI_sum.png
Saved plot: /content/drive/MyDrive/InformationSystems/Stability_NEW/results/plots/acc_drop_MUTAG_sum.png
