# Map the sweep IDs to names using the wanbd API

In [None]:
import wandb
import pandas as pd

# === Settings ===
ENTITY = "matteotolloso"        # e.g. "my-team" or your username
PROJECT = "graph-uncertainty"      # e.g. "my-cool-project"
INPUT_CSV = "/vast/m.tolloso/graph-uncertainty/wandb_export_2025-08-22T12_12_21.994+02_00.csv"
OUTPUT_CSV = "/vast/m.tolloso/graph-uncertainty/wandb_export_final.csv"

# === Load CSV ===
df = pd.read_csv(INPUT_CSV)

# Get unique sweep IDs from the CSV
sweep_ids = df["Sweep"].dropna().unique()

# === Fetch sweep names from API ===
api = wandb.Api()
id_to_name = {}

for sid in sweep_ids:
    try:
        sweep = api.sweep(f"{ENTITY}/{PROJECT}/{sid}")
        id_to_name[sid] = sweep.name or sid  # fallback to ID if no name
    except Exception as e:
        print(f"⚠️ Could not fetch sweep {sid}: {e}")
        id_to_name[sid] = sid  # fallback to ID

# === Replace IDs with names ===
df["Sweep"] = df["Sweep"].map(id_to_name)

# === Save new CSV ===
df.to_csv(OUTPUT_CSV, index=False)

print(f"✅ Updated CSV saved as {OUTPUT_CSV}")


In [4]:
from dataset_loader.dataset_loader import dataset_loader



datasets = ['coauthor', 'chameleon', 'squirrel', 'reddit2', 'arxiv', 'patents']

for dataset in datasets:
    data = dataset_loader(dataset, {})



--- Coauthor CS Dataset with OOD Validation ---
Nodes for training (ID only): 8813
Nodes for validation (ID+OOD): 3666 -> 2907 ID, 759 OOD
Nodes for testing (ID+OOD): 3668 -> 2964 ID, 704 OOD
Using DataLoader for full-batch training.
--- Chameleon Dataset with OOD Validation ---
Nodes for training (ID only): 647
Nodes for validation (ID+OOD): 729 -> 438 ID, 291 OOD
Nodes for testing (ID+OOD): 456 -> 276 ID, 180 OOD
--- Squirrel Dataset with OOD Validation ---
Nodes for training (ID only): 1515
Nodes for validation (ID+OOD): 1664 -> 982 ID, 682 OOD
Nodes for testing (ID+OOD): 1041 -> 622 ID, 419 OOD
--- Reddit2 Dataset with OOD Validation ---
Nodes for training (ID only): 109517
Nodes for validation (ID+OOD): 23699 -> 17343 ID, 6356 OOD
Nodes for testing (ID+OOD): 55334 -> 40601 ID, 14733 OOD
Using DataLoader for full-batch training.
Class Label Intervals:
Class 0: [-inf, 2014.0)]
Class 1: [2014.0, 2016.0)]
Class 2: [2016.0, 2018.0)]
Class 3: [2018.0, 2019.0)]
Class 4: [2019.0, inf)]
--

  loaded_dict = torch.load(pre_processed_file_path)


Nodes for validation (ID+OOD): 33868 -> 23415 ID, 10453 OOD
Nodes for testing (ID+OOD): 33870 -> 23245 ID, 10625 OOD
--- SNAP-Patents (Year) Dataset with OOD Validation ---
Nodes for training (ID only): 1053055
Nodes for validation (ID+OOD): 584784 -> 351259 ID, 233525 OOD
Nodes for testing (ID+OOD): 584785 -> 350730 ID, 234055 OOD
Using DataLoader for full-batch training.
