In [1]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.2.0+cu121.html
!pip install torch-sparse  -f https://data.pyg.org/whl/torch-2.2.0+cu121.html
!pip install torch-geometric

!pip install --use-deprecated=legacy-resolver karateclub networkx numpy pandas matplotlib scikit-learn

!pip install torch torchvision torchaudio
!pip install torch-geometric \
    -f https://data.pyg.org/whl/torch-$(python -c "import torch; print(torch.__version__)").html


!pip install optuna
!pip install karateclub

Looking in links: https://data.pyg.org/whl/torch-2.2.0+cu121.html
Looking in links: https://data.pyg.org/whl/torch-2.2.0+cu121.html
Looking in links: https://data.pyg.org/whl/torch-2.9.0+cu126.html
Collecting numpy<1.23.0 (from karateclub)
  Using cached numpy-1.22.4.zip (11.5 MB)
  Installing build dependencies ... [?25l[?25hdone
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Getting requirements to build wheel ... [?25l[?25herror
[1;31merror[0m: [1msubprocess-exited-with-error[0m

[31m×[0m [32mGetting requirements to build wheel[0m did not run successfully.
[31m│[0m exit code: [1;36m1[0m
[31m╰─>[0m See above for output.

[1;35mnote[0m: This error originates from a subprocess, and is likely not a

In [2]:
# get statistics

import numpy as np
import torch
from torch_geometric.datasets import TUDataset
from torch_geometric.utils import is_undirected

def dataset_stats(name, root="/content/data"):
    ds = TUDataset(root=root, name=name)

    num_graphs = len(ds)
    num_classes = ds.num_classes

    node_counts = []
    edge_counts_undirected = []
    edge_counts_raw = []
    undirected_flags = []

    for g in ds:
        n = int(g.num_nodes) if g.num_nodes is not None else 0
        e = int(g.num_edges) if g.edge_index is not None else 0  # raw (directed) count in PyG
        node_counts.append(n)
        edge_counts_raw.append(e)

        # Check undirected and convert to "unique undirected edges"
        if g.edge_index is not None and g.edge_index.numel() > 0:
            und = bool(is_undirected(g.edge_index))
        else:
            und = True
        undirected_flags.append(und)

        # If undirected, PyG usually stores both directions => divide by 2
        e_und = e // 2 if und else e
        edge_counts_undirected.append(e_und)

    stats = {
        "Dataset": name,
        "#Graphs": num_graphs,
        "#Classes": num_classes,
        "Avg #Nodes": float(np.mean(node_counts)),
        "Avg #Edges (undirected)": float(np.mean(edge_counts_undirected)),
        "Pct undirected graphs": 100.0 * float(np.mean(undirected_flags)),
    }
    return stats

DATASETS = ["MUTAG", "ENZYMES", "IMDB-MULTI"]
all_stats = [dataset_stats(d) for d in DATASETS]
all_stats

  import torch_geometric.typing
  import torch_geometric.typing


[{'Dataset': 'MUTAG',
  '#Graphs': 188,
  '#Classes': 2,
  'Avg #Nodes': 17.930851063829788,
  'Avg #Edges (undirected)': 19.79255319148936,
  'Pct undirected graphs': 100.0},
 {'Dataset': 'ENZYMES',
  '#Graphs': 600,
  '#Classes': 6,
  'Avg #Nodes': 32.63333333333333,
  'Avg #Edges (undirected)': 62.13666666666666,
  'Pct undirected graphs': 100.0},
 {'Dataset': 'IMDB-MULTI',
  '#Graphs': 1500,
  '#Classes': 3,
  'Avg #Nodes': 13.001333333333333,
  'Avg #Edges (undirected)': 65.93533333333333,
  'Pct undirected graphs': 100.0}]

In [3]:
# Imports
import joblib
import torch
import torch.nn.functional as F
from torch.nn import Linear, Sequential, ReLU, BatchNorm1d
from torch_geometric.nn import GINConv, global_add_pool
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from sklearn.metrics import accuracy_score, f1_score
from torch_geometric.transforms import OneHotDegree
import optuna
import pandas as pd
import time, os, psutil
from sklearn.metrics import roc_auc_score
import numpy as np
from karateclub import NetLSD, Graph2Vec
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from torch_geometric.utils import to_networkx
import networkx as nx
import warnings
warnings.filterwarnings("ignore")


In [4]:
# Connect to google drive
from google.colab import drive
drive.mount('/content/drive')
BASE_DIR = "/content/drive/MyDrive/InformationSystems/Classification"
RESULTS_DIR = f"{BASE_DIR}/results"
MODELS_DIR = f"{BASE_DIR}/models"
EMBEDDINGS_DIR = f"{BASE_DIR}/embeddings"
CLASSIF_RESULTS_DIR = f"{BASE_DIR}/results/classification"

os.makedirs(EMBEDDINGS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(CLASSIF_RESULTS_DIR, exist_ok=True)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
def make_deterministic_perm(n: int, seed: int) -> np.ndarray:
    """Deterministic permutation for dataset ordering (Fix A).

    We use this permutation everywhere we need a stable, reproducible graph order
    so that saved embeddings/labels can be aligned later (e.g., in stability.py).
    """
    return np.random.RandomState(seed).permutation(n)


In [6]:
def sanitize_embeddings(embeddings: np.ndarray) -> np.ndarray:
    """
    Replace NaN/Inf values in embeddings and ensure a clean float32 array.
    This is useful for karateclub embeddings that may occasionally produce
    unstable values on some graphs.
    """
    emb = np.asarray(embeddings, dtype=np.float32)
    # Replace NaN and +/- Inf with 0.0
    emb = np.nan_to_num(emb, nan=0.0, posinf=0.0, neginf=0.0)
    return emb

In [7]:
def filter_enzymes_graphs(graphs, labels, min_nodes: int = 3):
    """
    Special handling for ENZYMES: remove very small graphs that can cause
    numerical issues for NetLSD / Graph2Vec.
    Returns filtered (graphs, labels) and prints how many were removed.
    """
    if len(graphs) == 0:
        return graphs, labels

    mask = [g.number_of_nodes() >= min_nodes for g in graphs]
    if not any(mask):
        print("WARNING: All ENZYMES graphs would be filtered out. Skipping filtering.")
        return graphs, labels

    filtered_graphs = [g for g, keep in zip(graphs, mask) if keep]
    if isinstance(labels, np.ndarray):
       filtered_labels = labels[np.array(mask)]
    else:
        filtered_labels = [y for y, keep in zip(labels, mask) if keep]

    removed = len(graphs) - len(filtered_graphs)
    print(f"ENZYMES filtering: removed {removed} graphs with < {min_nodes} nodes, kept {len(filtered_graphs)} graphs.")
    return filtered_graphs, filtered_labels

In [8]:
# GIN Model Definition

class GIN(torch.nn.Module):
    def __init__(self, num_features, hidden_dim, num_classes, num_layers=5, dropout=0.5):
        super(GIN, self).__init__()
        layers = []
        in_dim = num_features
        for _ in range(num_layers):
            nn = Sequential(Linear(in_dim, hidden_dim), ReLU(), Linear(hidden_dim, hidden_dim))
            layers.append(GINConv(nn))
            in_dim = hidden_dim
        self.convs = torch.nn.ModuleList(layers)
        self.bns = torch.nn.ModuleList([BatchNorm1d(hidden_dim) for _ in range(num_layers)])
        self.fc1 = Linear(hidden_dim, hidden_dim)
        self.fc2 = Linear(hidden_dim, num_classes)
        self.dropout = dropout

    def forward(self, x, edge_index, batch):
        for conv, bn in zip(self.convs, self.bns):
            x = F.relu(conv(x, edge_index))
            x = bn(x)
        x = global_add_pool(x, batch)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)
        return x

In [9]:
# Training

def train(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in loader:
        data = data.to(DEVICE)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Evaluation

def evaluate(model, loader, criterion):
    model.eval()
    preds, labels, probs = [], [], []
    total_loss = 0.0
    num_batches = 0

    with torch.no_grad():
        for data in loader:
            data = data.to(DEVICE)
            out = model(data.x, data.edge_index, data.batch)

            loss = criterion(out, data.y)
            total_loss += loss.item()
            num_batches += 1

            pred = out.argmax(dim=1)
            preds.extend(pred.cpu().numpy())
            labels.extend(data.y.cpu().numpy())
            probs.extend(F.softmax(out, dim=1).cpu().numpy())  # probabilities for AUC

    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='weighted')
    avg_loss = total_loss / max(1, num_batches)

    try:
        if len(np.unique(labels)) == 2:
            auc = roc_auc_score(labels, np.array(probs)[:, 1])
        else:
            auc = roc_auc_score(labels, probs, multi_class='ovr')
    except ValueError:
        auc = np.nan  # if there are not enough samples for AUC

    return acc, f1, auc, avg_loss

In [10]:
def get_gin_embeddings(model, loader):
    """Return graph-level embeddings (after global_add_pool) and labels."""
    model.eval()
    all_emb = []
    all_labels = []
    with torch.no_grad():
        for data in loader:
            data = data.to(DEVICE)
            x, edge_index, batch = data.x, data.edge_index, data.batch
            # forward μέχρι το pooling
            for conv, bn in zip(model.convs, model.bns):
                x = F.relu(conv(x, edge_index))
                x = bn(x)
            x = global_add_pool(x, batch)
            all_emb.append(x.cpu().numpy())
            all_labels.extend(data.y.cpu().numpy())
    embeddings = np.concatenate(all_emb, axis=0)
    labels = np.array(all_labels)
    return embeddings, labels

In [11]:
def run_gin_pipeline(
    dataset_name,
    seed,
    use_optuna,
    w_acc,
    w_f1,
    w_auc,
    hidden_dim,
    epochs,
    batch_size=32,
    n_trials=15,
):

    # Experiment ID (used in logs and embeddings path)
    experiment_id = str(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    # Load dataset (Fix A: deterministic order, no implicit shuffle)
    dataset_raw = TUDataset(root='data/TUDataset', name=dataset_name)
    perm = make_deterministic_perm(len(dataset_raw), seed)
    dataset = dataset_raw[perm.tolist()]

    # If no node features use one-hot degree features
    if dataset.num_features == 0 or dataset[0].x is None:
        print("Dataset has no node features. Applying OneHotDegree transform...")

        # Find maximum degree across all graphs
        max_degree = 0
        for data in dataset:
            deg = torch.bincount(data.edge_index[0], minlength=data.num_nodes)
            max_degree = max(max_degree, int(deg.max()))

        # Apply transform
        oh_transform = OneHotDegree(max_degree=max_degree)
        dataset_raw = TUDataset(
            root='data/TUDataset',
            name=dataset_name,
            transform=oh_transform
        )

        # Apply the SAME deterministic permutation so order matches the non-feature case
        perm = make_deterministic_perm(len(dataset_raw), seed)
        dataset = dataset_raw[perm.tolist()]

        num_node_features = max_degree + 1
    else:
        num_node_features = dataset.num_features

    # Train/test split
    train_dataset = dataset[:int(0.8 * len(dataset))]
    test_dataset = dataset[int(0.8 * len(dataset)):]
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    print(f"Loaded dataset {dataset_name}: {len(dataset)} graphs, {num_node_features} node features, {dataset.num_classes} classes")

    def objective(trial):
        num_layers = trial.suggest_int("num_layers", 3, 6)
        dropout = trial.suggest_float("dropout", 0.0, 0.6)
        lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
        weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-3)

        model = GIN(num_node_features, hidden_dim, dataset.num_classes, num_layers, dropout).to(DEVICE)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        criterion = torch.nn.CrossEntropyLoss()

        for epoch in range(5):  # fewer epochs for fast tuning
            train(model, train_loader, optimizer, criterion)

        acc, f1, auc, _ = evaluate(model, test_loader, criterion)
        score = (w_acc * acc) + (w_f1 * f1) + (w_auc * (0 if np.isnan(auc) else auc))
        return score

    start_generation = time.time()
    if use_optuna:
        print("Running Optuna for hyperparameter tuning...")
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=n_trials)
        best_params = study.best_params
        print(f"Best hyperparameters: {best_params}")
    else:
      best_params = { "num_layers": 5, "dropout": 0.5, "lr": 0.001, "weight_decay": 1e-4}
      print(f"Using default hyperparameters: {best_params}")

    generation_time = time.time() - start_generation

    # Final Training with best parameters

    print("\nRunning final training GIN...")
    print(best_params)

    model = GIN(num_node_features, hidden_dim, dataset.num_classes,
                num_layers=best_params["num_layers"], dropout=best_params["dropout"]).to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=best_params["lr"], weight_decay=best_params["weight_decay"])
    criterion = torch.nn.CrossEntropyLoss()

    history = []
    start_time = time.time()
    eval_acc, eval_f1, eval_auc, eval_loss, eval_epoch = 0, 0, 0, 1e9, 0
    best_loss_for_best_epoch = 1e9
    for epoch in range(1, epochs + 1):
        loss = train(model, train_loader, optimizer, criterion)
        acc, f1, auc, e_loss = evaluate(model, test_loader, criterion)
        if acc > eval_acc:
          #edo mipos to allakso na einai kai edo sindiasmos me weights poy eixe kai sto optuna
          eval_acc, eval_f1, eval_auc, eval_loss, eval_epoch = acc, f1, auc, e_loss, epoch
          best_loss_for_best_epoch = e_loss

        elapsed = time.time() - start_time
        print(f"Epoch {epoch:03d} | Loss={loss:.4f} | TestAcc={acc:.3f} | F1={f1:.3f} | AUC={auc:.3f} | Time={elapsed:.2f}s")
        history.append([epoch, loss, acc, f1, auc, elapsed])

    training_time = time.time() - start_time
    process = psutil.Process(os.getpid())
    memory_usage = process.memory_info().rss / (1024 ** 2)  # in MB


    # Save GIN embeddings for the whole dataset

    full_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    gin_embeddings, gin_labels = get_gin_embeddings(model, full_loader)

    gin_exp_dir = os.path.join(EMBEDDINGS_DIR, "GIN", dataset_name, experiment_id)
    os.makedirs(gin_exp_dir, exist_ok=True)
    np.save(os.path.join(gin_exp_dir, "embeddings.npy"), gin_embeddings)
    np.save(os.path.join(gin_exp_dir, "labels.npy"), gin_labels)
    # Fix: persist the exact graph order used to produce embeddings/labels
    np.save(os.path.join(gin_exp_dir, "order.npy"), perm)

    # Log file save

    summary_path = f"{CLASSIF_RESULTS_DIR}/gin.csv"
    os.makedirs("results", exist_ok=True)


    summary_data = {
        "method": "GIN",
        "seed": seed,
        "dataset": dataset_name,
        "optimization_enabled": "yes" if use_optuna else "no",
        "embedding_dimension": hidden_dim,
        "objective_weights": f"({w_acc},{w_f1},{w_auc})",
        "num_layers": best_params["num_layers"],
        "dropout": best_params["dropout"],
        "lr": best_params["lr"],
        "weight_decay": best_params["weight_decay"],
        "epochs": epochs,
        "best_epoch": eval_epoch,
        "best_loss": round(float(best_loss_for_best_epoch), 4),
        "eval_loss": round(float(eval_loss), 4),
        "eval_acc": round(eval_acc, 4),
        "eval_f1": round(eval_f1, 4),
        "eval_auc": round(eval_auc, 4),
        "training_time (s)": round(training_time, 2),
        "generation_time (s)": round(generation_time, 2),
        "memory_usage (MB)": round(memory_usage, 2)
    }

    df = pd.DataFrame([summary_data])

    # Append mode (keep all trainings)
    if os.path.exists(summary_path):
        df.to_csv(summary_path, mode='a', index=False, header=False)
    else:
        df.to_csv(summary_path, index=False)

    print(f"\nTraining summary stored in : {summary_path}")
    print(df)

    # Save best model (weights + metadata)
    gin_ckpt = {
        "state_dict": model.state_dict(),
        "num_node_features": num_node_features,
        "hidden_dim": hidden_dim,
        "num_classes": dataset.num_classes,
        "num_layers": best_params["num_layers"],
        "dropout": best_params["dropout"],
    }
    torch.save(gin_ckpt, f"{MODELS_DIR}/GIN_{dataset_name}_{experiment_id}.pth")
    print(f"Saved model: {MODELS_DIR}/GIN_{dataset_name}_{experiment_id}.pth")


In [12]:
def run_graph2vec_pipeline(
    dataset_name,
    seed,
    w_acc=0.5,
    w_f1=0.3,
    w_auc=0.2,
    embedding_dim=128,
    epochs=50,
    test_size=0.2,
    use_optuna=True,
    n_trials=20,
):
    """
    Pipeline for graph classification using Graph2Vec embeddings + SVM,
    with optional Optuna-based hyperparameter tuning and special handling
    for ENZYMES + embedding sanitization.
    """
    # Experiment ID
    experiment_id = str(seed)
    np.random.seed(seed)

    # Load dataset
    # Fix : deterministic dataset order
    dataset_raw = TUDataset(root='data/TUDataset', name=dataset_name)
    perm = make_deterministic_perm(len(dataset_raw), seed)
    dataset = dataset_raw[perm.tolist()]
    print(f"Loaded dataset {dataset_name} for Graph2Vec: {len(dataset)} graphs, {dataset.num_classes} classes")

    # ds_indices[k] is the index in the original (unshuffled) dataset for dataset[k]
    ds_indices = perm.copy()


    # Convert PyG graphs to NetworkX graphs
    graphs = []
    labels = []
    for data in dataset:
        g = to_networkx(data, to_undirected=True)
        graphs.append(g)
        labels.append(int(data.y.item()))

    labels = np.array(labels)
    # Positions in the *current* graphs list (used for splitting)
    pos = np.arange(len(labels))

    # Special handling for ENZYMES (filter very small graphs)
    if dataset_name.upper() == "ENZYMES":
        # Keep original dataset indices aligned with graphs/labels after filtering
        mask = np.array([g.number_of_nodes() >= 3 for g in graphs], dtype=bool)
        graphs = [g for g, keep in zip(graphs, mask) if keep]
        labels = labels[mask]
        ds_indices = ds_indices[mask]
        pos = np.arange(len(labels))
        removed = int(np.sum(~mask))
        print(f"ENZYMES filtering: removed {removed} graphs with < 3 nodes, kept {len(graphs)} graphs.")

    # Outer train/test split on graphs
    train_pos, test_pos, y_train, y_test = train_test_split(
        pos,
        labels,
        test_size=test_size,
        random_state=seed,
        stratify=labels,
    )
    train_graphs = [graphs[i] for i in train_pos]
    test_graphs = [graphs[i] for i in test_pos]

    opt_time = 0.0

    def objective(trial):
        # Hyperparameters for the SVM classifier
        C = trial.suggest_loguniform("C", 1e-2, 1e2)
        gamma = trial.suggest_loguniform("gamma", 1e-4, 1e1)

        # Inner train/validation split on graphs
        inner_tr_idx, inner_val_idx, y_tr, y_val = train_test_split(
            np.arange(len(train_graphs)),
            y_train,
            test_size=0.2,
            random_state=seed,
            stratify=y_train,
        )

        inner_tr_graphs = [train_graphs[i] for i in inner_tr_idx]
        inner_val_graphs = [train_graphs[i] for i in inner_val_idx]


        all_graphs = inner_tr_graphs + inner_val_graphs

        # Fit Graph2Vec on all (transductive setting) and slice embeddings
        g2v = Graph2Vec(dimensions=embedding_dim, wl_iterations=2, epochs=epochs, workers=os.cpu_count())
        g2v.fit(all_graphs)
        emb_all = sanitize_embeddings(g2v.get_embedding())

        X_tr = emb_all[:len(inner_tr_graphs)]
        X_val = emb_all[len(inner_tr_graphs):]

        clf = SVC(kernel="rbf", probability=True, C=C, gamma=gamma, random_state=seed)
        clf.fit(X_tr, y_tr)
        y_pred = clf.predict(X_val)
        y_prob = clf.predict_proba(X_val)

        acc = accuracy_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred, average="weighted")
        try:
            if len(np.unique(y_val)) == 2:
                auc = roc_auc_score(y_val, y_prob[:, 1])
            else:
                auc = roc_auc_score(y_val, y_prob, multi_class="ovr")
        except ValueError:
            auc = np.nan

        score = (w_acc * acc) + (w_f1 * f1) + (w_auc * (0 if np.isnan(auc) else auc))
        return score

    if use_optuna:
        print("Running Optuna for Graph2Vec+SVM hyperparameter tuning...")
        start_opt = time.time()
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=n_trials)
        best_params = study.best_params
        opt_time = time.time() - start_opt
        print(f"Best hyperparameters (Graph2Vec+SVM): {best_params}")
    else:
        best_params = {"C": 1.0, "gamma": "scale"}
        print(f"Using default SVM hyperparameters: {best_params}")

    # Final embedding + training using best hyperparameters
    print("Running final Graph2Vec embedding on train+test graphs...")
    all_graphs_final = train_graphs + test_graphs
    order_pos = np.concatenate([train_pos, test_pos])
    start_embed = time.time()
    g2v = Graph2Vec(dimensions=embedding_dim, wl_iterations=2, epochs=epochs, workers=os.cpu_count())
    g2v.fit(all_graphs_final)
    emb_all = sanitize_embeddings(g2v.get_embedding())
    embed_time = time.time() - start_embed

    X_train = emb_all[:len(train_graphs)]
    X_test = emb_all[len(train_graphs):]

    print("Training final SVM on Graph2Vec embeddings...")
    start_train = time.time()
    clf = SVC(kernel="rbf", probability=True, C=best_params["C"], gamma=best_params["gamma"], random_state=seed)
    clf.fit(X_train, y_train)
    train_time = time.time() - start_train

    # Evaluation on held-out test graphs
    y_pred = clf.predict(X_test)
    y_prob = clf.predict_proba(X_test)

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    try:
        if len(np.unique(y_test)) == 2:
            auc = roc_auc_score(y_test, y_prob[:, 1])
        else:
            auc = roc_auc_score(y_test, y_prob, multi_class="ovr")
    except ValueError:
        auc = np.nan

    score = (w_acc * acc) + (w_f1 * f1) + (w_auc * (0 if np.isnan(auc) else auc))


    # Save Graph2Vec embeddings (for all graphs) + labels

    g2v_exp_dir = os.path.join(EMBEDDINGS_DIR, "Graph2Vec", dataset_name, experiment_id)
    os.makedirs(g2v_exp_dir, exist_ok=True)
    np.save(os.path.join(g2v_exp_dir, "embeddings.npy"), emb_all)
    # Save labels in the SAME order as embeddings (train_pos + test_pos)
    np.save(os.path.join(g2v_exp_dir, "labels.npy"), labels[order_pos])
    # Fix: persist ORIGINAL dataset indices (so stability can rebuild graphs correctly)
    np.save(os.path.join(g2v_exp_dir, "order.npy"), ds_indices[order_pos])

    # Save fitted Graph2Vec model
    g2v_model_path = f"{MODELS_DIR}/Graph2Vec_{dataset_name}_{experiment_id}.joblib"
    joblib.dump(g2v, g2v_model_path)

    # Save trained SVM classifier
    svm_model_path = f"{MODELS_DIR}/Graph2Vec_SVM_{dataset_name}_{experiment_id}.joblib"
    joblib.dump(clf, svm_model_path)


    process = psutil.Process(os.getpid())
    memory_usage = process.memory_info().rss / (1024 ** 2)  # in MB

    print(f"Graph2Vec Results on {dataset_name} -> Acc: {acc:.3f}, F1: {f1:.3f}, AUC: {auc:.3f}, Score: {score:.3f}")
    print(f"Embedding time: {embed_time:.2f}s | SVM training time: {train_time:.2f}s | Optuna time: {opt_time:.2f}s | Memory usage: {memory_usage:.2f} MB")

    # Log summary to CSV
    summary_path = f"{CLASSIF_RESULTS_DIR}/g2v.csv"

    summary_data = {
        "method": "Graph2Vec",
        "seed": seed,
        "dataset": dataset_name,
        "embedding_dimension": embedding_dim,
        "optuna_enabled": "yes" if use_optuna else "no",
        "C": best_params["C"],
        "gamma": best_params["gamma"],
        "acc": round(float(acc), 4),
        "f1": round(float(f1), 4),
        "auc": round(float(auc) if not np.isnan(auc) else -1, 4),
        "score": round(float(score), 4),
        "embedding_time (s)": round(embed_time, 2),
        "svm_training_time (s)": round(train_time, 2),
        "optuna_time (s)": round(opt_time, 2),
        "memory_usage (MB)": round(memory_usage, 2),
    }

    df = pd.DataFrame([summary_data])
    if os.path.exists(summary_path):
        df.to_csv(summary_path, mode='a', index=False, header=False)
    else:
        df.to_csv(summary_path, index=False)

    print(f"Graph2Vec summary stored in: {summary_path}")


In [13]:
def run_netlsd_pipeline(
    dataset_name,
    seed,
    w_acc=0.5,
    w_f1=0.3,
    w_auc=0.2,
    test_size=0.2,
    use_optuna=True,
    n_trials=20,
):
    """
    Pipeline for graph classification using NetLSD embeddings + SVM,
    with optional Optuna-based hyperparameter tuning and ENZYMES filtering.
    """
    # Experiment ID
    experiment_id = str(seed)
    np.random.seed(seed)
    # Fix: deterministic dataset order
    dataset_raw = TUDataset(root='data/TUDataset', name=dataset_name)
    perm = make_deterministic_perm(len(dataset_raw), seed)
    dataset = dataset_raw[perm.tolist()]
    print(f"Loaded dataset {dataset_name} for NetLSD: {len(dataset)} graphs, {dataset.num_classes} classes")

    # ds_indices[k] is the index in the original (unshuffled) dataset for dataset[k]
    ds_indices = perm.copy()

    # Convert PyG graphs to NetworkX graphs
    graphs = []
    labels = []
    for data in dataset:
        g = to_networkx(data, to_undirected=True)
        graphs.append(g)
        labels.append(int(data.y.item()))

    labels = np.array(labels)
    pos = np.arange(len(labels))

    # Special handling for ENZYMES
    if dataset_name.upper() == "ENZYMES":
        # Keep original dataset indices aligned with graphs/labels after filtering
        mask = np.array([g.number_of_nodes() >= 3 for g in graphs], dtype=bool)
        graphs = [g for g, keep in zip(graphs, mask) if keep]
        labels = labels[mask]
        ds_indices = ds_indices[mask]
        pos = np.arange(len(labels))
        removed = int(np.sum(~mask))
        print(f"ENZYMES filtering: removed {removed} graphs with < 3 nodes, kept {len(graphs)} graphs.")

    # Outer train/test split on graphs
    train_pos, test_pos, y_train, y_test = train_test_split(
        pos,
        labels,
        test_size=test_size,
        random_state=seed,
        stratify=labels,
    )
    train_graphs = [graphs[i] for i in train_pos]
    test_graphs = [graphs[i] for i in test_pos]

    opt_time = 0.0

    def objective(trial):
        # Hyperparameters for the SVM classifier
        C = trial.suggest_loguniform("C", 1e-2, 1e2)
        gamma = trial.suggest_loguniform("gamma", 1e-4, 1e1)

        # Inner train/validation split on graphs
        inner_tr_idx, inner_val_idx, y_tr, y_val = train_test_split(
            np.arange(len(train_graphs)),
            y_train,
            test_size=0.2,
            random_state=seed,
            stratify=y_train,
        )

        inner_tr_graphs = [train_graphs[i] for i in inner_tr_idx]
        inner_val_graphs = [train_graphs[i] for i in inner_val_idx]


        all_graphs = inner_tr_graphs + inner_val_graphs

        # Fit NetLSD on all and slice embeddings
        netlsd = NetLSD()
        netlsd.fit(all_graphs)
        emb_all = sanitize_embeddings(netlsd.get_embedding())

        X_tr = emb_all[:len(inner_tr_graphs)]
        X_val = emb_all[len(inner_tr_graphs):]

        clf = SVC(kernel="rbf", probability=True, C=C, gamma=gamma, random_state=seed)
        clf.fit(X_tr, y_tr)
        y_pred = clf.predict(X_val)
        y_prob = clf.predict_proba(X_val)

        acc = accuracy_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred, average="weighted")
        try:
            if len(np.unique(y_val)) == 2:
                auc = roc_auc_score(y_val, y_prob[:, 1])
            else:
                auc = roc_auc_score(y_val, y_prob, multi_class="ovr")
        except ValueError:
            auc = np.nan

        score = (w_acc * acc) + (w_f1 * f1) + (w_auc * (0 if np.isnan(auc) else auc))
        return score

    if use_optuna:
        print("Running Optuna for NetLSD+SVM hyperparameter tuning...")
        start_opt = time.time()
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=n_trials)
        best_params = study.best_params
        opt_time = time.time() - start_opt
        print(f"Best hyperparameters (NetLSD+SVM): {best_params}")
    else:
        best_params = {"C": 1.0, "gamma": "scale"}
        print(f"Using default SVM hyperparameters: {best_params}")

    # Final embedding + training using best hyperparameters
    print("Running final NetLSD embedding on train+test graphs...")
    all_graphs_final = train_graphs + test_graphs
    order_pos = np.concatenate([train_pos, test_pos])
    start_embed = time.time()
    netlsd = NetLSD()
    netlsd.fit(all_graphs_final)
    emb_all = sanitize_embeddings(netlsd.get_embedding())
    embed_time = time.time() - start_embed

    X_train = emb_all[:len(train_graphs)]
    X_test = emb_all[len(train_graphs):]

    print("Training final SVM on NetLSD embeddings...")
    start_train = time.time()
    clf = SVC(kernel="rbf", probability=True, C=best_params["C"], gamma=best_params["gamma"], random_state=seed)
    clf.fit(X_train, y_train)
    train_time = time.time() - start_train

    # Evaluation on held-out test graphs
    y_pred = clf.predict(X_test)
    y_prob = clf.predict_proba(X_test)

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    try:
        if len(np.unique(y_test)) == 2:
            auc = roc_auc_score(y_test, y_prob[:, 1])
        else:
            auc = roc_auc_score(y_test, y_prob, multi_class="ovr")
    except ValueError:
        auc = np.nan

    score = (w_acc * acc) + (w_f1 * f1) + (w_auc * (0 if np.isnan(auc) else auc))


    # Save NetLSD embeddings (all graphs) + labels

    netlsd_exp_dir = os.path.join(EMBEDDINGS_DIR, "NetLSD",dataset_name, experiment_id)
    os.makedirs(netlsd_exp_dir, exist_ok=True)
    np.save(os.path.join(netlsd_exp_dir, "embeddings.npy"), emb_all)
    # Save labels in the SAME order as embeddings (train_pos + test_pos)
    np.save(os.path.join(netlsd_exp_dir, "labels.npy"), labels[order_pos])
    # Fix: persist ORIGINAL dataset indices (so stability can rebuild graphs correctly)
    np.save(os.path.join(netlsd_exp_dir, "order.npy"), ds_indices[order_pos])

    # Save fitted NetLSD model
    netlsd_model_path = f"{MODELS_DIR}/NetLSD_{dataset_name}_{experiment_id}.joblib"
    joblib.dump(netlsd, netlsd_model_path)

    # Save trained SVM classifier
    svm_model_path = f"{MODELS_DIR}/NetLSD_SVM_{dataset_name}_{experiment_id}.joblib"
    joblib.dump(clf, svm_model_path)


    process = psutil.Process(os.getpid())
    memory_usage = process.memory_info().rss / (1024 ** 2)  # in MB

    print(f"NetLSD Results on {dataset_name} -> Acc: {acc:.3f}, F1: {f1:.3f}, AUC: {auc:.3f}, Score: {score:.3f}")
    print(f"Embedding time: {embed_time:.2f}s | SVM training time: {train_time:.2f}s | Optuna time: {opt_time:.2f}s | Memory usage: {memory_usage:.2f} MB")

    # Log summary to CSV
    summary_path = f"{CLASSIF_RESULTS_DIR}/netlsd.csv"

    summary_data = {
        "method": "NetLSD",
        "seed": seed,
        "optuna_enabled": "yes" if use_optuna else "no",
        "C": best_params["C"],
        "gamma": best_params["gamma"],
        "acc": round(float(acc), 4),
        "f1": round(float(f1), 4),
        "auc": round(float(auc) if not np.isnan(auc) else -1, 4),
        "score": round(float(score), 4),
        "embedding_time (s)": round(embed_time, 2),
        "svm_training_time (s)": round(train_time, 2),
        "optuna_time (s)": round(opt_time, 2),
        "memory_usage (MB)": round(memory_usage, 2),
    }

    df = pd.DataFrame([summary_data])
    if os.path.exists(summary_path):
        df.to_csv(summary_path, mode='a', index=False, header=False)
    else:
        df.to_csv(summary_path, index=False)

    print(f"NetLSD summary stored in: {summary_path}")


In [14]:
for dataset_name in ["IMDB-MULTI", "ENZYMES", "MUTAG"]:
  for seed in [42, 43, 44]:
      run_gin_pipeline(
          dataset_name=dataset_name,
          seed=seed,
          use_optuna=True,
          w_acc=0.5,
          w_f1=0.3,
          w_auc=0.2,
          hidden_dim=64,
          epochs=200,
          batch_size=32,
          n_trials=10,
      )

Dataset has no node features. Applying OneHotDegree transform...


[I 2026-01-27 17:35:32,568] A new study created in memory with name: no-name-8916767e-3b86-4f14-b1ca-cd4d2b0ed20f


Loaded dataset IMDB-MULTI: 1500 graphs, 89 node features, 3 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:35:39,782] Trial 0 finished with value: 0.44587087521311447 and parameters: {'num_layers': 4, 'dropout': 0.1789115145273103, 'lr': 0.0003149013649881986, 'weight_decay': 8.035259098640091e-05}. Best is trial 0 with value: 0.44587087521311447.
[I 2026-01-27 17:35:44,512] Trial 1 finished with value: 0.4909933557499653 and parameters: {'num_layers': 3, 'dropout': 0.410648645730567, 'lr': 0.0003144417340315641, 'weight_decay': 4.316958953756027e-05}. Best is trial 1 with value: 0.4909933557499653.
[I 2026-01-27 17:35:49,423] Trial 2 finished with value: 0.5029612026679156 and parameters: {'num_layers': 3, 'dropout': 0.594114263810321, 'lr': 0.0021316908204075885, 'weight_decay': 3.663522497433119e-05}. Best is trial 2 with value: 0.5029612026679156.
[I 2026-01-27 17:35:53,164] Trial 3 finished with value: 0.4055543721413451 and parameters: {'num_layers': 4, 'dropout': 0.168177813512488, 'lr': 0.00017266278603552933, 'weight_decay': 0.0001980061603105917}. Best is trial 2 

Best hyperparameters: {'num_layers': 3, 'dropout': 0.594114263810321, 'lr': 0.0021316908204075885, 'weight_decay': 3.663522497433119e-05}

Running final training GIN...
{'num_layers': 3, 'dropout': 0.594114263810321, 'lr': 0.0021316908204075885, 'weight_decay': 3.663522497433119e-05}
Epoch 001 | Loss=2.3996 | TestAcc=0.477 | F1=0.477 | AUC=0.651 | Time=0.35s
Epoch 002 | Loss=1.7556 | TestAcc=0.400 | F1=0.348 | AUC=0.611 | Time=0.70s
Epoch 003 | Loss=1.4624 | TestAcc=0.420 | F1=0.387 | AUC=0.606 | Time=1.05s
Epoch 004 | Loss=1.2173 | TestAcc=0.467 | F1=0.433 | AUC=0.661 | Time=1.40s
Epoch 005 | Loss=1.0912 | TestAcc=0.470 | F1=0.461 | AUC=0.654 | Time=1.77s
Epoch 006 | Loss=1.0943 | TestAcc=0.460 | F1=0.428 | AUC=0.653 | Time=2.12s
Epoch 007 | Loss=1.0525 | TestAcc=0.467 | F1=0.426 | AUC=0.680 | Time=2.46s
Epoch 008 | Loss=1.0355 | TestAcc=0.480 | F1=0.449 | AUC=0.671 | Time=2.82s
Epoch 009 | Loss=1.0368 | TestAcc=0.487 | F1=0.485 | AUC=0.666 | Time=3.17s
Epoch 010 | Loss=1.0314 | TestA

[I 2026-01-27 17:37:23,648] A new study created in memory with name: no-name-949a25ca-1005-4894-a4ae-f8df6a54014b


Loaded dataset IMDB-MULTI: 1500 graphs, 89 node features, 3 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:37:25,714] Trial 0 finished with value: 0.43206978756071074 and parameters: {'num_layers': 4, 'dropout': 0.048554786682629976, 'lr': 0.000821332771808136, 'weight_decay': 4.832262236849303e-06}. Best is trial 0 with value: 0.43206978756071074.
[I 2026-01-27 17:37:27,825] Trial 1 finished with value: 0.3783728866217536 and parameters: {'num_layers': 5, 'dropout': 0.3874049602254113, 'lr': 0.00012492159714514201, 'weight_decay': 1.3679448601761318e-05}. Best is trial 0 with value: 0.43206978756071074.
[I 2026-01-27 17:37:29,598] Trial 2 finished with value: 0.5342208278192756 and parameters: {'num_layers': 3, 'dropout': 0.05643184068150653, 'lr': 0.008682711243659175, 'weight_decay': 0.00037285376000681814}. Best is trial 2 with value: 0.5342208278192756.
[I 2026-01-27 17:37:31,787] Trial 3 finished with value: 0.4724744971595624 and parameters: {'num_layers': 5, 'dropout': 0.17050826567546176, 'lr': 0.0071286997532850555, 'weight_decay': 0.0008886845293093101}. Best is 

Best hyperparameters: {'num_layers': 3, 'dropout': 0.05643184068150653, 'lr': 0.008682711243659175, 'weight_decay': 0.00037285376000681814}

Running final training GIN...
{'num_layers': 3, 'dropout': 0.05643184068150653, 'lr': 0.008682711243659175, 'weight_decay': 0.00037285376000681814}
Epoch 001 | Loss=1.9572 | TestAcc=0.387 | F1=0.359 | AUC=0.536 | Time=0.35s
Epoch 002 | Loss=1.2050 | TestAcc=0.430 | F1=0.397 | AUC=0.673 | Time=0.71s
Epoch 003 | Loss=1.0640 | TestAcc=0.490 | F1=0.483 | AUC=0.667 | Time=1.20s
Epoch 004 | Loss=1.0338 | TestAcc=0.443 | F1=0.425 | AUC=0.653 | Time=1.66s
Epoch 005 | Loss=1.0372 | TestAcc=0.457 | F1=0.393 | AUC=0.661 | Time=2.16s
Epoch 006 | Loss=1.0126 | TestAcc=0.443 | F1=0.408 | AUC=0.675 | Time=2.62s
Epoch 007 | Loss=1.0088 | TestAcc=0.507 | F1=0.436 | AUC=0.667 | Time=3.08s
Epoch 008 | Loss=1.0285 | TestAcc=0.457 | F1=0.454 | AUC=0.642 | Time=3.58s
Epoch 009 | Loss=1.0154 | TestAcc=0.433 | F1=0.385 | AUC=0.654 | Time=4.04s
Epoch 010 | Loss=0.9963 | T

[I 2026-01-27 17:39:03,281] A new study created in memory with name: no-name-0d9f1599-b179-460a-86c2-6fa7c391153e


Loaded dataset IMDB-MULTI: 1500 graphs, 89 node features, 3 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:39:06,004] Trial 0 finished with value: 0.4306182605538637 and parameters: {'num_layers': 5, 'dropout': 0.4763631702042135, 'lr': 0.0002269960719596284, 'weight_decay': 2.3180445433799735e-06}. Best is trial 0 with value: 0.4306182605538637.
[I 2026-01-27 17:39:08,439] Trial 1 finished with value: 0.5381627622386819 and parameters: {'num_layers': 3, 'dropout': 0.5295522006031206, 'lr': 0.001402970913782123, 'weight_decay': 1.6657985859816478e-05}. Best is trial 1 with value: 0.5381627622386819.
[I 2026-01-27 17:39:10,402] Trial 2 finished with value: 0.4835180044966748 and parameters: {'num_layers': 4, 'dropout': 0.07496566305421304, 'lr': 0.008380475116089586, 'weight_decay': 1.7625175119050873e-06}. Best is trial 1 with value: 0.5381627622386819.
[I 2026-01-27 17:39:12,542] Trial 3 finished with value: 0.4494693455867373 and parameters: {'num_layers': 5, 'dropout': 0.48627301764635655, 'lr': 0.0001690178788740342, 'weight_decay': 0.00039599025739748026}. Best is tria

Best hyperparameters: {'num_layers': 3, 'dropout': 0.5295522006031206, 'lr': 0.001402970913782123, 'weight_decay': 1.6657985859816478e-05}

Running final training GIN...
{'num_layers': 3, 'dropout': 0.5295522006031206, 'lr': 0.001402970913782123, 'weight_decay': 1.6657985859816478e-05}
Epoch 001 | Loss=2.1836 | TestAcc=0.413 | F1=0.323 | AUC=0.683 | Time=0.36s
Epoch 002 | Loss=1.6860 | TestAcc=0.477 | F1=0.467 | AUC=0.647 | Time=0.70s
Epoch 003 | Loss=1.4860 | TestAcc=0.503 | F1=0.503 | AUC=0.686 | Time=1.07s
Epoch 004 | Loss=1.2259 | TestAcc=0.473 | F1=0.450 | AUC=0.670 | Time=1.43s
Epoch 005 | Loss=1.1583 | TestAcc=0.500 | F1=0.489 | AUC=0.691 | Time=1.77s
Epoch 006 | Loss=1.1131 | TestAcc=0.483 | F1=0.471 | AUC=0.667 | Time=2.13s
Epoch 007 | Loss=1.0683 | TestAcc=0.487 | F1=0.481 | AUC=0.689 | Time=2.48s
Epoch 008 | Loss=1.0422 | TestAcc=0.480 | F1=0.447 | AUC=0.673 | Time=2.84s
Epoch 009 | Loss=1.0608 | TestAcc=0.477 | F1=0.429 | AUC=0.676 | Time=3.21s
Epoch 010 | Loss=0.9960 | Tes

[I 2026-01-27 17:40:43,316] A new study created in memory with name: no-name-cb13f198-5b92-4ab5-a0c2-df56d64ffe11



Training summary stored in : /content/drive/MyDrive/InformationSystems/Classification/results/classification/gin.csv
  method  seed     dataset optimization_enabled  embedding_dimension  \
0    GIN    44  IMDB-MULTI                  yes                   64   

  objective_weights  num_layers   dropout        lr  weight_decay  epochs  \
0     (0.5,0.3,0.2)           3  0.529552  0.001403      0.000017     200   

   best_epoch  best_loss  eval_loss  eval_acc  eval_f1  eval_auc  \
0          43     0.9866     0.9866    0.5167   0.5053    0.6976   

   training_time (s)  generation_time (s)  memory_usage (MB)  
0              77.13                22.25            1439.54  
Saved model: /content/drive/MyDrive/InformationSystems/Classification/models/GIN_IMDB-MULTI_44.pth
Loaded dataset ENZYMES: 600 graphs, 3 node features, 6 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:40:43,898] Trial 0 finished with value: 0.4199344509413244 and parameters: {'num_layers': 3, 'dropout': 0.4563099640429195, 'lr': 0.0002369663745664236, 'weight_decay': 1.743115535469026e-06}. Best is trial 0 with value: 0.4199344509413244.
[I 2026-01-27 17:40:44,655] Trial 1 finished with value: 0.2960375728009135 and parameters: {'num_layers': 6, 'dropout': 0.03402302916868094, 'lr': 0.00040852628267223657, 'weight_decay': 3.6067791832750497e-06}. Best is trial 0 with value: 0.4199344509413244.
[I 2026-01-27 17:40:45,420] Trial 2 finished with value: 0.3253178476619162 and parameters: {'num_layers': 6, 'dropout': 0.47528987901045194, 'lr': 0.0017429046280163054, 'weight_decay': 0.0002496555959941857}. Best is trial 0 with value: 0.4199344509413244.
[I 2026-01-27 17:40:46,098] Trial 3 finished with value: 0.31223354436626766 and parameters: {'num_layers': 5, 'dropout': 0.4472487973925237, 'lr': 0.009734329664710882, 'weight_decay': 7.344675573906255e-06}. Best is tria

Best hyperparameters: {'num_layers': 3, 'dropout': 0.4563099640429195, 'lr': 0.0002369663745664236, 'weight_decay': 1.743115535469026e-06}

Running final training GIN...
{'num_layers': 3, 'dropout': 0.4563099640429195, 'lr': 0.0002369663745664236, 'weight_decay': 1.743115535469026e-06}
Epoch 001 | Loss=6.2745 | TestAcc=0.200 | F1=0.081 | AUC=0.442 | Time=0.13s
Epoch 002 | Loss=4.5059 | TestAcc=0.208 | F1=0.120 | AUC=0.489 | Time=0.27s
Epoch 003 | Loss=3.7881 | TestAcc=0.242 | F1=0.188 | AUC=0.541 | Time=0.39s
Epoch 004 | Loss=3.8170 | TestAcc=0.250 | F1=0.220 | AUC=0.589 | Time=0.52s
Epoch 005 | Loss=3.1320 | TestAcc=0.267 | F1=0.265 | AUC=0.637 | Time=0.63s
Epoch 006 | Loss=3.2751 | TestAcc=0.308 | F1=0.290 | AUC=0.636 | Time=0.75s
Epoch 007 | Loss=3.0374 | TestAcc=0.300 | F1=0.277 | AUC=0.667 | Time=0.87s
Epoch 008 | Loss=2.9440 | TestAcc=0.325 | F1=0.313 | AUC=0.665 | Time=0.99s
Epoch 009 | Loss=2.7353 | TestAcc=0.317 | F1=0.307 | AUC=0.659 | Time=1.15s
Epoch 010 | Loss=2.5742 | Tes

[I 2026-01-27 17:41:15,415] A new study created in memory with name: no-name-3959f14b-f743-47e8-b602-764ce9a16daa


Epoch 200 | Loss=0.7432 | TestAcc=0.458 | F1=0.448 | AUC=0.765 | Time=25.53s

Training summary stored in : /content/drive/MyDrive/InformationSystems/Classification/results/classification/gin.csv
  method  seed  dataset optimization_enabled  embedding_dimension  \
0    GIN    42  ENZYMES                  yes                   64   

  objective_weights  num_layers  dropout        lr  weight_decay  epochs  \
0     (0.5,0.3,0.2)           3  0.45631  0.000237      0.000002     200   

   best_epoch  best_loss  eval_loss  eval_acc  eval_f1  eval_auc  \
0         168     1.9766     1.9766       0.5   0.4931    0.7544   

   training_time (s)  generation_time (s)  memory_usage (MB)  
0              25.53                 6.46            1439.98  
Saved model: /content/drive/MyDrive/InformationSystems/Classification/models/GIN_ENZYMES_42.pth
Loaded dataset ENZYMES: 600 graphs, 3 node features, 6 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:41:16,032] Trial 0 finished with value: 0.35443880888484125 and parameters: {'num_layers': 4, 'dropout': 0.5604097484488613, 'lr': 0.0066450224044829476, 'weight_decay': 1.3707562001964826e-06}. Best is trial 0 with value: 0.35443880888484125.
[I 2026-01-27 17:41:16,542] Trial 1 finished with value: 0.43284084853988736 and parameters: {'num_layers': 3, 'dropout': 0.4354013924885559, 'lr': 0.007466931275764762, 'weight_decay': 1.5011746425889494e-05}. Best is trial 1 with value: 0.43284084853988736.
[I 2026-01-27 17:41:17,118] Trial 2 finished with value: 0.31680439285312945 and parameters: {'num_layers': 4, 'dropout': 0.35022985310079247, 'lr': 0.0009386814273596195, 'weight_decay': 4.729975358157199e-05}. Best is trial 1 with value: 0.43284084853988736.
[I 2026-01-27 17:41:17,774] Trial 3 finished with value: 0.4059906716489484 and parameters: {'num_layers': 3, 'dropout': 0.023967277965709587, 'lr': 0.0008901192405249095, 'weight_decay': 0.0007434705363991491}. Best i

Best hyperparameters: {'num_layers': 3, 'dropout': 0.4354013924885559, 'lr': 0.007466931275764762, 'weight_decay': 1.5011746425889494e-05}

Running final training GIN...
{'num_layers': 3, 'dropout': 0.4354013924885559, 'lr': 0.007466931275764762, 'weight_decay': 1.5011746425889494e-05}
Epoch 001 | Loss=5.7944 | TestAcc=0.250 | F1=0.166 | AUC=0.621 | Time=0.13s
Epoch 002 | Loss=3.2729 | TestAcc=0.200 | F1=0.173 | AUC=0.661 | Time=0.24s
Epoch 003 | Loss=2.3372 | TestAcc=0.267 | F1=0.230 | AUC=0.652 | Time=0.36s
Epoch 004 | Loss=1.9782 | TestAcc=0.242 | F1=0.241 | AUC=0.618 | Time=0.48s
Epoch 005 | Loss=1.7695 | TestAcc=0.300 | F1=0.307 | AUC=0.654 | Time=0.59s
Epoch 006 | Loss=1.6871 | TestAcc=0.317 | F1=0.310 | AUC=0.665 | Time=0.70s
Epoch 007 | Loss=1.6441 | TestAcc=0.283 | F1=0.265 | AUC=0.657 | Time=0.82s
Epoch 008 | Loss=1.6105 | TestAcc=0.367 | F1=0.351 | AUC=0.732 | Time=0.93s
Epoch 009 | Loss=1.5885 | TestAcc=0.317 | F1=0.294 | AUC=0.670 | Time=1.04s
Epoch 010 | Loss=1.5864 | Tes

[I 2026-01-27 17:41:47,681] A new study created in memory with name: no-name-f31d47bc-a3af-4c99-a512-a0a016940ce2


Epoch 200 | Loss=1.3775 | TestAcc=0.342 | F1=0.332 | AUC=0.683 | Time=25.26s

Training summary stored in : /content/drive/MyDrive/InformationSystems/Classification/results/classification/gin.csv
  method  seed  dataset optimization_enabled  embedding_dimension  \
0    GIN    43  ENZYMES                  yes                   64   

  objective_weights  num_layers   dropout        lr  weight_decay  epochs  \
0     (0.5,0.3,0.2)           3  0.435401  0.007467      0.000015     200   

   best_epoch  best_loss  eval_loss  eval_acc  eval_f1  eval_auc  \
0          15     1.6486     1.6486    0.4167   0.4231    0.7309   

   training_time (s)  generation_time (s)  memory_usage (MB)  
0              25.26                 6.88             1440.0  
Saved model: /content/drive/MyDrive/InformationSystems/Classification/models/GIN_ENZYMES_43.pth
Loaded dataset ENZYMES: 600 graphs, 3 node features, 6 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:41:48,354] Trial 0 finished with value: 0.3294524007720977 and parameters: {'num_layers': 5, 'dropout': 0.4554538945747439, 'lr': 0.0007963138616745652, 'weight_decay': 1.851762791900625e-05}. Best is trial 0 with value: 0.3294524007720977.
[I 2026-01-27 17:41:49,090] Trial 1 finished with value: 0.3220928548864769 and parameters: {'num_layers': 6, 'dropout': 0.15581551532426824, 'lr': 0.006596351407364286, 'weight_decay': 0.0002439696658539506}. Best is trial 0 with value: 0.3294524007720977.
[I 2026-01-27 17:41:49,600] Trial 2 finished with value: 0.34687654745122665 and parameters: {'num_layers': 3, 'dropout': 0.4036149238858609, 'lr': 0.0001319614490565822, 'weight_decay': 1.0417698860690217e-05}. Best is trial 2 with value: 0.34687654745122665.
[I 2026-01-27 17:41:50,232] Trial 3 finished with value: 0.21823031945072816 and parameters: {'num_layers': 5, 'dropout': 0.5602691042108918, 'lr': 0.00017023221111445692, 'weight_decay': 0.00012131277145411777}. Best is tr

Best hyperparameters: {'num_layers': 4, 'dropout': 0.1695997123032732, 'lr': 0.003016042378704602, 'weight_decay': 0.0005793735616898713}

Running final training GIN...
{'num_layers': 4, 'dropout': 0.1695997123032732, 'lr': 0.003016042378704602, 'weight_decay': 0.0005793735616898713}
Epoch 001 | Loss=3.9202 | TestAcc=0.167 | F1=0.161 | AUC=0.505 | Time=0.14s
Epoch 002 | Loss=2.7112 | TestAcc=0.233 | F1=0.215 | AUC=0.550 | Time=0.28s
Epoch 003 | Loss=2.6291 | TestAcc=0.258 | F1=0.266 | AUC=0.600 | Time=0.41s
Epoch 004 | Loss=2.2990 | TestAcc=0.250 | F1=0.235 | AUC=0.627 | Time=0.54s
Epoch 005 | Loss=2.1031 | TestAcc=0.283 | F1=0.259 | AUC=0.669 | Time=0.67s
Epoch 006 | Loss=1.9967 | TestAcc=0.333 | F1=0.304 | AUC=0.676 | Time=0.80s
Epoch 007 | Loss=1.7386 | TestAcc=0.358 | F1=0.341 | AUC=0.698 | Time=0.94s
Epoch 008 | Loss=1.6493 | TestAcc=0.275 | F1=0.271 | AUC=0.625 | Time=1.08s
Epoch 009 | Loss=1.6442 | TestAcc=0.367 | F1=0.342 | AUC=0.726 | Time=1.21s
Epoch 010 | Loss=1.5948 | TestA

[I 2026-01-27 17:42:22,304] A new study created in memory with name: no-name-d9dbcd8e-3a44-4dde-969c-7eb4a7fda476


Epoch 200 | Loss=0.5922 | TestAcc=0.442 | F1=0.445 | AUC=0.755 | Time=28.39s

Training summary stored in : /content/drive/MyDrive/InformationSystems/Classification/results/classification/gin.csv
  method  seed  dataset optimization_enabled  embedding_dimension  \
0    GIN    44  ENZYMES                  yes                   64   

  objective_weights  num_layers  dropout        lr  weight_decay  epochs  \
0     (0.5,0.3,0.2)           4   0.1696  0.003016      0.000579     200   

   best_epoch  best_loss  eval_loss  eval_acc  eval_f1  eval_auc  \
0         185     2.3739     2.3739    0.5083   0.5084    0.7528   

   training_time (s)  generation_time (s)  memory_usage (MB)  
0              28.39                 6.11            1440.02  
Saved model: /content/drive/MyDrive/InformationSystems/Classification/models/GIN_ENZYMES_44.pth
Loaded dataset MUTAG: 188 graphs, 7 node features, 2 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:42:22,543] Trial 0 finished with value: 0.7618562753036437 and parameters: {'num_layers': 5, 'dropout': 0.2480007873539216, 'lr': 0.005519380439870522, 'weight_decay': 7.749636253915167e-05}. Best is trial 0 with value: 0.7618562753036437.
[I 2026-01-27 17:42:22,805] Trial 1 finished with value: 0.652973587815693 and parameters: {'num_layers': 6, 'dropout': 0.49973103881484837, 'lr': 0.00044192139892283315, 'weight_decay': 1.8130482056423905e-06}. Best is trial 0 with value: 0.7618562753036437.
[I 2026-01-27 17:42:23,053] Trial 2 finished with value: 0.68189666473877 and parameters: {'num_layers': 6, 'dropout': 0.3526095357156409, 'lr': 0.0002935798515351089, 'weight_decay': 6.147739358123621e-06}. Best is trial 0 with value: 0.7618562753036437.
[I 2026-01-27 17:42:23,388] Trial 3 finished with value: 0.3815284591569421 and parameters: {'num_layers': 6, 'dropout': 0.0047523083087558145, 'lr': 0.0010536782948178676, 'weight_decay': 1.245252942445855e-06}. Best is trial 

Best hyperparameters: {'num_layers': 5, 'dropout': 0.2480007873539216, 'lr': 0.005519380439870522, 'weight_decay': 7.749636253915167e-05}

Running final training GIN...
{'num_layers': 5, 'dropout': 0.2480007873539216, 'lr': 0.005519380439870522, 'weight_decay': 7.749636253915167e-05}
Epoch 001 | Loss=0.8902 | TestAcc=0.658 | F1=0.522 | AUC=0.938 | Time=0.07s
Epoch 002 | Loss=0.8633 | TestAcc=0.684 | F1=0.612 | AUC=0.865 | Time=0.14s
Epoch 003 | Loss=0.5682 | TestAcc=0.816 | F1=0.820 | AUC=0.865 | Time=0.21s
Epoch 004 | Loss=0.4030 | TestAcc=0.842 | F1=0.846 | AUC=0.849 | Time=0.27s
Epoch 005 | Loss=0.3590 | TestAcc=0.711 | F1=0.713 | AUC=0.874 | Time=0.34s
Epoch 006 | Loss=0.4203 | TestAcc=0.684 | F1=0.684 | AUC=0.883 | Time=0.41s
Epoch 007 | Loss=0.3379 | TestAcc=0.789 | F1=0.794 | AUC=0.892 | Time=0.48s
Epoch 008 | Loss=0.3390 | TestAcc=0.868 | F1=0.872 | AUC=0.923 | Time=0.56s
Epoch 009 | Loss=0.2970 | TestAcc=0.868 | F1=0.867 | AUC=0.935 | Time=0.64s
Epoch 010 | Loss=0.2906 | TestA

[I 2026-01-27 17:42:36,329] A new study created in memory with name: no-name-3c06e9a3-652b-4649-b197-046a8b02d269


Saved model: /content/drive/MyDrive/InformationSystems/Classification/models/GIN_MUTAG_42.pth
Loaded dataset MUTAG: 188 graphs, 7 node features, 2 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:42:36,685] Trial 0 finished with value: 0.2483736397270232 and parameters: {'num_layers': 5, 'dropout': 0.27082717210844004, 'lr': 0.0004444005127989369, 'weight_decay': 0.00020202073440608157}. Best is trial 0 with value: 0.2483736397270232.
[I 2026-01-27 17:42:37,024] Trial 1 finished with value: 0.8906928393244182 and parameters: {'num_layers': 6, 'dropout': 0.3112402384426334, 'lr': 0.005149715217118113, 'weight_decay': 4.818880269263866e-06}. Best is trial 1 with value: 0.8906928393244182.
[I 2026-01-27 17:42:37,303] Trial 2 finished with value: 0.6551062743013207 and parameters: {'num_layers': 5, 'dropout': 0.33177020602750074, 'lr': 0.00040355005040478736, 'weight_decay': 3.777527412467734e-05}. Best is trial 1 with value: 0.8906928393244182.
[I 2026-01-27 17:42:37,638] Trial 3 finished with value: 0.19248138383476732 and parameters: {'num_layers': 5, 'dropout': 0.06266289006142074, 'lr': 0.00011021042885174218, 'weight_decay': 0.00024368172266079423}. Best is t

Best hyperparameters: {'num_layers': 6, 'dropout': 0.3112402384426334, 'lr': 0.005149715217118113, 'weight_decay': 4.818880269263866e-06}

Running final training GIN...
{'num_layers': 6, 'dropout': 0.3112402384426334, 'lr': 0.005149715217118113, 'weight_decay': 4.818880269263866e-06}
Epoch 001 | Loss=1.1756 | TestAcc=0.711 | F1=0.590 | AUC=0.946 | Time=0.09s
Epoch 002 | Loss=1.4412 | TestAcc=0.711 | F1=0.590 | AUC=0.323 | Time=0.17s
Epoch 003 | Loss=0.8649 | TestAcc=0.237 | F1=0.149 | AUC=0.155 | Time=0.23s
Epoch 004 | Loss=0.6916 | TestAcc=0.289 | F1=0.130 | AUC=0.283 | Time=0.29s
Epoch 005 | Loss=0.4023 | TestAcc=0.132 | F1=0.097 | AUC=0.030 | Time=0.35s
Epoch 006 | Loss=0.3244 | TestAcc=0.263 | F1=0.161 | AUC=0.047 | Time=0.41s
Epoch 007 | Loss=0.3382 | TestAcc=0.263 | F1=0.161 | AUC=0.273 | Time=0.47s
Epoch 008 | Loss=0.4263 | TestAcc=0.316 | F1=0.252 | AUC=0.458 | Time=0.53s
Epoch 009 | Loss=0.4223 | TestAcc=0.842 | F1=0.848 | AUC=0.889 | Time=0.59s
Epoch 010 | Loss=0.4245 | TestA

[I 2026-01-27 17:42:51,989] A new study created in memory with name: no-name-1f1c1a71-ca54-4224-a38f-1d588513eec0


Epoch 200 | Loss=0.2939 | TestAcc=0.842 | F1=0.820 | AUC=0.966 | Time=12.54s

Training summary stored in : /content/drive/MyDrive/InformationSystems/Classification/results/classification/gin.csv
  method  seed dataset optimization_enabled  embedding_dimension  \
0    GIN    43   MUTAG                  yes                   64   

  objective_weights  num_layers  dropout       lr  weight_decay  epochs  \
0     (0.5,0.3,0.2)           6  0.31124  0.00515      0.000005     200   

   best_epoch  best_loss  eval_loss  eval_acc  eval_f1  eval_auc  \
0          23     0.3237     0.3237    0.9211    0.922    0.9596   

   training_time (s)  generation_time (s)  memory_usage (MB)  
0              12.54                 3.01            1440.09  
Saved model: /content/drive/MyDrive/InformationSystems/Classification/models/GIN_MUTAG_43.pth
Loaded dataset MUTAG: 188 graphs, 7 node features, 2 classes
Running Optuna for hyperparameter tuning...


[I 2026-01-27 17:42:52,385] Trial 0 finished with value: 0.7887775998481668 and parameters: {'num_layers': 5, 'dropout': 0.23846767413995934, 'lr': 0.004767743741387555, 'weight_decay': 0.00010172329037934279}. Best is trial 0 with value: 0.7887775998481668.
[I 2026-01-27 17:42:52,731] Trial 1 finished with value: 0.25808779820616007 and parameters: {'num_layers': 6, 'dropout': 0.16272197158301505, 'lr': 0.0005489954961812735, 'weight_decay': 1.7990479666192114e-05}. Best is trial 0 with value: 0.7887775998481668.
[I 2026-01-27 17:42:52,904] Trial 2 finished with value: 0.8552833952697836 and parameters: {'num_layers': 3, 'dropout': 0.4892944312110003, 'lr': 0.002267340128662578, 'weight_decay': 0.00035419932725050293}. Best is trial 2 with value: 0.8552833952697836.
[I 2026-01-27 17:42:53,165] Trial 3 finished with value: 0.7621476706004159 and parameters: {'num_layers': 6, 'dropout': 0.13912176774264445, 'lr': 0.0014417392795328524, 'weight_decay': 0.00016318300583254082}. Best is tr

Best hyperparameters: {'num_layers': 5, 'dropout': 0.5645653497395093, 'lr': 0.005290060438946103, 'weight_decay': 4.172358923310851e-06}

Running final training GIN...
{'num_layers': 5, 'dropout': 0.5645653497395093, 'lr': 0.005290060438946103, 'weight_decay': 4.172358923310851e-06}
Epoch 001 | Loss=1.2848 | TestAcc=0.763 | F1=0.661 | AUC=0.904 | Time=0.05s
Epoch 002 | Loss=0.8336 | TestAcc=0.763 | F1=0.661 | AUC=0.897 | Time=0.10s
Epoch 003 | Loss=1.0150 | TestAcc=0.789 | F1=0.718 | AUC=0.881 | Time=0.15s
Epoch 004 | Loss=0.7314 | TestAcc=0.816 | F1=0.767 | AUC=0.866 | Time=0.20s
Epoch 005 | Loss=0.5198 | TestAcc=0.842 | F1=0.810 | AUC=0.904 | Time=0.25s
Epoch 006 | Loss=0.4884 | TestAcc=0.816 | F1=0.788 | AUC=0.920 | Time=0.31s
Epoch 007 | Loss=0.4197 | TestAcc=0.842 | F1=0.825 | AUC=0.935 | Time=0.36s
Epoch 008 | Loss=0.3670 | TestAcc=0.895 | F1=0.890 | AUC=0.935 | Time=0.41s
Epoch 009 | Loss=0.2968 | TestAcc=0.842 | F1=0.825 | AUC=0.931 | Time=0.47s
Epoch 010 | Loss=0.3331 | TestA

In [15]:
for dataset_name in ["IMDB-MULTI", "ENZYMES", "MUTAG"]:
  for seed in [42, 43, 44]:
      run_graph2vec_pipeline(
          dataset_name=dataset_name,
          seed=seed,
          w_acc=0.5, w_f1=0.3, w_auc=0.2,
          embedding_dim=128,
          epochs=200,
          test_size=0.2,
          use_optuna=True,
          n_trials=10,
      )

Loaded dataset IMDB-MULTI for Graph2Vec: 1500 graphs, 3 classes


[I 2026-01-27 17:43:06,910] A new study created in memory with name: no-name-0658c69d-f2b8-4c8f-940c-48e0a2878913


Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:43:16,298] Trial 0 finished with value: 0.47770594407313155 and parameters: {'C': 10.867463624186174, 'gamma': 4.779141986996383}. Best is trial 0 with value: 0.47770594407313155.
[I 2026-01-27 17:43:25,952] Trial 1 finished with value: 0.446353745579119 and parameters: {'C': 65.94454809217339, 'gamma': 3.9232357221947645}. Best is trial 0 with value: 0.47770594407313155.
[I 2026-01-27 17:43:35,237] Trial 2 finished with value: 0.5674344070994178 and parameters: {'C': 0.3155516521751666, 'gamma': 0.0077339853580041485}. Best is trial 2 with value: 0.5674344070994178.
[I 2026-01-27 17:43:44,689] Trial 3 finished with value: 0.4914423534729372 and parameters: {'C': 0.20700711333234584, 'gamma': 2.8270055433898396}. Best is trial 2 with value: 0.5674344070994178.
[I 2026-01-27 17:43:54,009] Trial 4 finished with value: 0.5405306657072796 and parameters: {'C': 3.6792611145200764, 'gamma': 0.000633127285473922}. Best is trial 2 with value: 0.5674344070994178.
[I 2026-01-27 

Best hyperparameters (Graph2Vec+SVM): {'C': 0.3155516521751666, 'gamma': 0.0077339853580041485}
Running final Graph2Vec embedding on train+test graphs...
Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on IMDB-MULTI -> Acc: 0.450, F1: 0.421, AUC: 0.664, Score: 0.484
Embedding time: 11.34s | SVM training time: 1.02s | Optuna time: 95.06s | Memory usage: 1460.57 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset IMDB-MULTI for Graph2Vec: 1500 graphs, 3 classes


[I 2026-01-27 17:44:55,009] A new study created in memory with name: no-name-a98c1600-5c8a-4184-ba16-31af84f2f7c1


Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:45:04,963] Trial 0 finished with value: 0.43945293859286033 and parameters: {'C': 0.03597802938158339, 'gamma': 0.002134146183770069}. Best is trial 0 with value: 0.43945293859286033.
[I 2026-01-27 17:45:14,562] Trial 1 finished with value: 0.5170263463314534 and parameters: {'C': 3.4746481988363778, 'gamma': 7.106851736533086}. Best is trial 1 with value: 0.5170263463314534.
[I 2026-01-27 17:45:23,900] Trial 2 finished with value: 0.51500885427439 and parameters: {'C': 28.833320595019316, 'gamma': 0.005690848803869117}. Best is trial 1 with value: 0.5170263463314534.
[I 2026-01-27 17:45:33,500] Trial 3 finished with value: 0.42188040935130694 and parameters: {'C': 0.20733486197823367, 'gamma': 0.0007820423070887074}. Best is trial 1 with value: 0.5170263463314534.
[I 2026-01-27 17:45:42,909] Trial 4 finished with value: 0.43290168601933093 and parameters: {'C': 0.029279026841699796, 'gamma': 0.0002803309324004197}. Best is trial 1 with value: 0.5170263463314534.
[I 20

Best hyperparameters (Graph2Vec+SVM): {'C': 3.4746481988363778, 'gamma': 7.106851736533086}
Running final Graph2Vec embedding on train+test graphs...
Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on IMDB-MULTI -> Acc: 0.433, F1: 0.430, AUC: 0.590, Score: 0.464
Embedding time: 11.15s | SVM training time: 1.24s | Optuna time: 94.60s | Memory usage: 1471.80 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset IMDB-MULTI for Graph2Vec: 1500 graphs, 3 classes


[I 2026-01-27 17:46:42,886] A new study created in memory with name: no-name-38dfd258-df72-4ad9-864c-7fce5f9a4545


Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:46:51,837] Trial 0 finished with value: 0.43986437485622265 and parameters: {'C': 0.05698841520046094, 'gamma': 0.4180560740347483}. Best is trial 0 with value: 0.43986437485622265.
[I 2026-01-27 17:47:01,066] Trial 1 finished with value: 0.5535732139570255 and parameters: {'C': 40.41673026445329, 'gamma': 1.4814663179322798}. Best is trial 1 with value: 0.5535732139570255.
[I 2026-01-27 17:47:10,452] Trial 2 finished with value: 0.5269673669127395 and parameters: {'C': 2.249545695833285, 'gamma': 0.09297721593632988}. Best is trial 1 with value: 0.5535732139570255.
[I 2026-01-27 17:47:19,219] Trial 3 finished with value: 0.3723607718030543 and parameters: {'C': 0.08744337672473487, 'gamma': 9.70680506005835}. Best is trial 1 with value: 0.5535732139570255.
[I 2026-01-27 17:47:28,293] Trial 4 finished with value: 0.5271770610754986 and parameters: {'C': 0.8581359778803447, 'gamma': 1.2530857788540548}. Best is trial 1 with value: 0.5535732139570255.
[I 2026-01-27 17:47

Best hyperparameters (Graph2Vec+SVM): {'C': 40.41673026445329, 'gamma': 1.4814663179322798}
Running final Graph2Vec embedding on train+test graphs...
Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on IMDB-MULTI -> Acc: 0.417, F1: 0.415, AUC: 0.591, Score: 0.451
Embedding time: 11.21s | SVM training time: 0.79s | Optuna time: 90.18s | Memory usage: 1473.54 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset ENZYMES for Graph2Vec: 600 graphs, 6 classes


[I 2026-01-27 17:48:25,728] A new study created in memory with name: no-name-4469dab1-0869-4126-abf1-0763399d33d9


ENZYMES filtering: removed 1 graphs with < 3 nodes, kept 599 graphs.
Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:48:31,031] Trial 0 finished with value: 0.3175005307948252 and parameters: {'C': 0.028639419138244963, 'gamma': 0.25291636755107744}. Best is trial 0 with value: 0.3175005307948252.
[I 2026-01-27 17:48:36,125] Trial 1 finished with value: 0.2995560241599486 and parameters: {'C': 1.7706868441888195, 'gamma': 0.0017355386814643249}. Best is trial 0 with value: 0.3175005307948252.
[I 2026-01-27 17:48:41,163] Trial 2 finished with value: 0.15437288851351352 and parameters: {'C': 0.012698233446550343, 'gamma': 4.3356765972054845}. Best is trial 0 with value: 0.3175005307948252.
[I 2026-01-27 17:48:46,423] Trial 3 finished with value: 0.15129261363636365 and parameters: {'C': 0.015777265435590104, 'gamma': 1.9935324495981528}. Best is trial 0 with value: 0.3175005307948252.
[I 2026-01-27 17:48:51,336] Trial 4 finished with value: 0.2754628754287423 and parameters: {'C': 0.01060338812839618, 'gamma': 0.0005919846446764682}. Best is trial 0 with value: 0.3175005307948252.
[I 2

Best hyperparameters (Graph2Vec+SVM): {'C': 1.8635933862071399, 'gamma': 0.18780724178389432}
Running final Graph2Vec embedding on train+test graphs...
Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on ENZYMES -> Acc: 0.392, F1: 0.393, AUC: 0.730, Score: 0.460
Embedding time: 6.86s | SVM training time: 0.14s | Optuna time: 51.68s | Memory usage: 1473.60 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset ENZYMES for Graph2Vec: 600 graphs, 6 classes


[I 2026-01-27 17:49:24,746] A new study created in memory with name: no-name-dbd509f9-3f94-42bf-ac8a-1bd424a2020a


ENZYMES filtering: removed 1 graphs with < 3 nodes, kept 599 graphs.
Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:49:29,728] Trial 0 finished with value: 0.37599187271062273 and parameters: {'C': 95.46059495937493, 'gamma': 0.00031646576010734733}. Best is trial 0 with value: 0.37599187271062273.
[I 2026-01-27 17:49:34,848] Trial 1 finished with value: 0.2427298078043947 and parameters: {'C': 1.6112514329880547, 'gamma': 0.0013706351008168876}. Best is trial 0 with value: 0.37599187271062273.
[I 2026-01-27 17:49:40,153] Trial 2 finished with value: 0.39019362469898233 and parameters: {'C': 0.7720645332446774, 'gamma': 0.20183208626826546}. Best is trial 2 with value: 0.39019362469898233.
[I 2026-01-27 17:49:45,110] Trial 3 finished with value: 0.4450483660438298 and parameters: {'C': 4.004333647847854, 'gamma': 0.2595466630399477}. Best is trial 3 with value: 0.4450483660438298.
[I 2026-01-27 17:49:50,474] Trial 4 finished with value: 0.4609305223285487 and parameters: {'C': 1.5094348948602458, 'gamma': 0.1895419485981245}. Best is trial 4 with value: 0.4609305223285487.
[I 2026-0

Best hyperparameters (Graph2Vec+SVM): {'C': 1.5094348948602458, 'gamma': 0.1895419485981245}
Running final Graph2Vec embedding on train+test graphs...
Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on ENZYMES -> Acc: 0.400, F1: 0.397, AUC: 0.733, Score: 0.466
Embedding time: 6.70s | SVM training time: 0.14s | Optuna time: 52.22s | Memory usage: 1473.60 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset ENZYMES for Graph2Vec: 600 graphs, 6 classes


[I 2026-01-27 17:50:24,126] A new study created in memory with name: no-name-2ecbd273-3057-4316-90ed-dbe80120633c


ENZYMES filtering: removed 1 graphs with < 3 nodes, kept 599 graphs.
Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:50:29,342] Trial 0 finished with value: 0.3363530497299789 and parameters: {'C': 13.622655632171941, 'gamma': 0.007018551119933589}. Best is trial 0 with value: 0.3363530497299789.
[I 2026-01-27 17:50:34,766] Trial 1 finished with value: 0.2932746676632546 and parameters: {'C': 0.010092705024610049, 'gamma': 0.24798919625796587}. Best is trial 0 with value: 0.3363530497299789.
[I 2026-01-27 17:50:39,961] Trial 2 finished with value: 0.3365954573443798 and parameters: {'C': 80.92347210741383, 'gamma': 0.00016108893199756868}. Best is trial 2 with value: 0.3365954573443798.
[I 2026-01-27 17:50:45,297] Trial 3 finished with value: 0.25217453276857305 and parameters: {'C': 2.939513872715674, 'gamma': 0.00015600826633336648}. Best is trial 2 with value: 0.3365954573443798.
[I 2026-01-27 17:50:50,187] Trial 4 finished with value: 0.30944733037077055 and parameters: {'C': 8.535788877502526, 'gamma': 0.8780950318836974}. Best is trial 2 with value: 0.3365954573443798.
[I 2026-

Best hyperparameters (Graph2Vec+SVM): {'C': 37.43424113468544, 'gamma': 0.05742978471279611}
Running final Graph2Vec embedding on train+test graphs...
Training final SVM on Graph2Vec embeddings...


[I 2026-01-27 17:51:22,821] A new study created in memory with name: no-name-1287f607-07fc-406d-8fe4-d867001c8ae9


Graph2Vec Results on ENZYMES -> Acc: 0.358, F1: 0.346, AUC: 0.724, Score: 0.428
Embedding time: 6.43s | SVM training time: 0.24s | Optuna time: 51.80s | Memory usage: 1473.60 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset MUTAG for Graph2Vec: 188 graphs, 2 classes
Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:51:23,981] Trial 0 finished with value: 0.8072272727272728 and parameters: {'C': 20.81817611857203, 'gamma': 0.061802908291554735}. Best is trial 0 with value: 0.8072272727272728.
[I 2026-01-27 17:51:25,065] Trial 1 finished with value: 0.6453333333333333 and parameters: {'C': 0.01778794288245111, 'gamma': 0.028802524142384277}. Best is trial 0 with value: 0.8072272727272728.
[I 2026-01-27 17:51:25,803] Trial 2 finished with value: 0.6823333333333332 and parameters: {'C': 0.0222603478621279, 'gamma': 4.97720981889199}. Best is trial 0 with value: 0.8072272727272728.
[I 2026-01-27 17:51:26,595] Trial 3 finished with value: 0.840111111111111 and parameters: {'C': 1.4893453547469757, 'gamma': 3.3492319656503327}. Best is trial 3 with value: 0.840111111111111.
[I 2026-01-27 17:51:27,351] Trial 4 finished with value: 0.6423333333333333 and parameters: {'C': 2.666325237654527, 'gamma': 0.011296903585200087}. Best is trial 3 with value: 0.840111111111111.
[I 2026-01-27 17:51:

Best hyperparameters (Graph2Vec+SVM): {'C': 2.164886681667186, 'gamma': 1.5616813009677528}
Running final Graph2Vec embedding on train+test graphs...


[I 2026-01-27 17:51:32,110] A new study created in memory with name: no-name-c1438e64-5d8f-4bae-9ce0-a68daa6ee086


Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on MUTAG -> Acc: 0.789, F1: 0.778, AUC: 0.868, Score: 0.802
Embedding time: 0.90s | SVM training time: 0.01s | Optuna time: 8.28s | Memory usage: 1473.60 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset MUTAG for Graph2Vec: 188 graphs, 2 classes
Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:51:32,912] Trial 0 finished with value: 0.6423333333333333 and parameters: {'C': 0.08082738610154913, 'gamma': 0.02913298242273298}. Best is trial 0 with value: 0.6423333333333333.
[I 2026-01-27 17:51:33,667] Trial 1 finished with value: 0.6423333333333333 and parameters: {'C': 26.174238043768984, 'gamma': 0.0009840534837662574}. Best is trial 0 with value: 0.6423333333333333.
[I 2026-01-27 17:51:34,440] Trial 2 finished with value: 0.6423333333333333 and parameters: {'C': 2.004236946854984, 'gamma': 0.002166521980937205}. Best is trial 0 with value: 0.6423333333333333.
[I 2026-01-27 17:51:35,322] Trial 3 finished with value: 0.8671515151515152 and parameters: {'C': 27.157199159073894, 'gamma': 0.17646958746853852}. Best is trial 3 with value: 0.8671515151515152.
[I 2026-01-27 17:51:36,459] Trial 4 finished with value: 0.6423333333333333 and parameters: {'C': 0.7645115495374762, 'gamma': 0.03445426250161034}. Best is trial 3 with value: 0.8671515151515152.
[I 2026-01-2

Best hyperparameters (Graph2Vec+SVM): {'C': 27.157199159073894, 'gamma': 0.17646958746853852}
Running final Graph2Vec embedding on train+test graphs...


[I 2026-01-27 17:51:42,079] A new study created in memory with name: no-name-2dbfd664-bf16-40cb-9a41-dfd3d9034780


Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on MUTAG -> Acc: 0.789, F1: 0.789, AUC: 0.880, Score: 0.808
Embedding time: 0.93s | SVM training time: 0.01s | Optuna time: 8.94s | Memory usage: 1470.65 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv
Loaded dataset MUTAG for Graph2Vec: 188 graphs, 2 classes
Running Optuna for Graph2Vec+SVM hyperparameter tuning...


[I 2026-01-27 17:51:42,827] Trial 0 finished with value: 0.793344886606761 and parameters: {'C': 3.415747094029785, 'gamma': 1.556084003590642}. Best is trial 0 with value: 0.793344886606761.
[I 2026-01-27 17:51:43,582] Trial 1 finished with value: 0.7945987232102144 and parameters: {'C': 0.7875087762740153, 'gamma': 3.900645092385029}. Best is trial 1 with value: 0.7945987232102144.
[I 2026-01-27 17:51:44,331] Trial 2 finished with value: 0.6923333333333332 and parameters: {'C': 0.4109401878386158, 'gamma': 0.011465912050758722}. Best is trial 1 with value: 0.7945987232102144.
[I 2026-01-27 17:51:45,090] Trial 3 finished with value: 0.6893333333333334 and parameters: {'C': 1.8958977592663582, 'gamma': 0.0013999152747740937}. Best is trial 1 with value: 0.7945987232102144.
[I 2026-01-27 17:51:45,839] Trial 4 finished with value: 0.6923333333333332 and parameters: {'C': 0.022890984815543306, 'gamma': 0.32112199535638775}. Best is trial 1 with value: 0.7945987232102144.
[I 2026-01-27 17:

Best hyperparameters (Graph2Vec+SVM): {'C': 0.7875087762740153, 'gamma': 3.900645092385029}
Running final Graph2Vec embedding on train+test graphs...
Training final SVM on Graph2Vec embeddings...
Graph2Vec Results on MUTAG -> Acc: 0.789, F1: 0.789, AUC: 0.862, Score: 0.804
Embedding time: 1.35s | SVM training time: 0.01s | Optuna time: 8.28s | Memory usage: 1470.65 MB
Graph2Vec summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/g2v.csv


In [16]:
for dataset_name in ["IMDB-MULTI", "ENZYMES", "MUTAG"]:
  for seed in [42, 43, 44]:
      run_netlsd_pipeline(
          dataset_name=dataset_name,
          seed=seed,
          w_acc=0.5, w_f1=0.3, w_auc=0.2,
          test_size=0.2,
          use_optuna=True,
          n_trials=10,
      )

Loaded dataset IMDB-MULTI for NetLSD: 1500 graphs, 3 classes


[I 2026-01-27 17:51:52,264] A new study created in memory with name: no-name-d84aa726-f79e-47d5-be99-bea720726354


Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:51:56,644] Trial 0 finished with value: 0.34947103262366186 and parameters: {'C': 0.04033374011187141, 'gamma': 1.54694488280316}. Best is trial 0 with value: 0.34947103262366186.
[I 2026-01-27 17:52:00,935] Trial 1 finished with value: 0.42135489933741077 and parameters: {'C': 0.06192663099172617, 'gamma': 0.00011326495532043095}. Best is trial 1 with value: 0.42135489933741077.
[I 2026-01-27 17:52:06,548] Trial 2 finished with value: 0.4225215660040774 and parameters: {'C': 0.9795645028447804, 'gamma': 0.002972120556553736}. Best is trial 2 with value: 0.4225215660040774.
[I 2026-01-27 17:52:10,804] Trial 3 finished with value: 0.4186047886957813 and parameters: {'C': 0.6116347303349853, 'gamma': 0.040648560150916405}. Best is trial 2 with value: 0.4225215660040774.
[I 2026-01-27 17:52:15,204] Trial 4 finished with value: 0.42261010767074414 and parameters: {'C': 3.460961910612948, 'gamma': 0.0004381137458508494}. Best is trial 4 with value: 0.42261010767074414.
[I 2

Best hyperparameters (NetLSD+SVM): {'C': 9.911160566612544, 'gamma': 2.5794568691187574}
Running final NetLSD embedding on train+test graphs...
Training final SVM on NetLSD embeddings...
NetLSD Results on IMDB-MULTI -> Acc: 0.453, F1: 0.403, AUC: 0.646, Score: 0.477
Embedding time: 5.10s | SVM training time: 1.59s | Optuna time: 46.70s | Memory usage: 1477.66 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset IMDB-MULTI for NetLSD: 1500 graphs, 3 classes


[I 2026-01-27 17:52:46,394] A new study created in memory with name: no-name-cf8c85e6-f8ad-43b9-a9d7-d73a3660406d


Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:52:51,022] Trial 0 finished with value: 0.4711949765008424 and parameters: {'C': 0.586583702636339, 'gamma': 6.008923438150466}. Best is trial 0 with value: 0.4711949765008424.
[I 2026-01-27 17:52:55,481] Trial 1 finished with value: 0.43387215377850386 and parameters: {'C': 0.058652341751429395, 'gamma': 0.0002456751108299386}. Best is trial 0 with value: 0.4711949765008424.
[I 2026-01-27 17:53:00,956] Trial 2 finished with value: 0.43321590377850383 and parameters: {'C': 0.028538291781789196, 'gamma': 0.002404335205074206}. Best is trial 0 with value: 0.4711949765008424.
[I 2026-01-27 17:53:05,226] Trial 3 finished with value: 0.47996932641813833 and parameters: {'C': 1.0316018465099985, 'gamma': 7.176231257413315}. Best is trial 3 with value: 0.47996932641813833.
[I 2026-01-27 17:53:09,985] Trial 4 finished with value: 0.4346013204451705 and parameters: {'C': 0.1278273503748476, 'gamma': 0.001589586501268075}. Best is trial 3 with value: 0.47996932641813833.
[I 2026

Best hyperparameters (NetLSD+SVM): {'C': 5.7754454368047865, 'gamma': 6.530494131456295}
Running final NetLSD embedding on train+test graphs...
Training final SVM on NetLSD embeddings...
NetLSD Results on IMDB-MULTI -> Acc: 0.490, F1: 0.477, AUC: 0.648, Score: 0.518
Embedding time: 5.47s | SVM training time: 1.21s | Optuna time: 46.93s | Memory usage: 1479.67 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset IMDB-MULTI for NetLSD: 1500 graphs, 3 classes


[I 2026-01-27 17:53:40,731] A new study created in memory with name: no-name-55ebc45b-1485-486c-8dc1-a5b426b6f0b7


Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:53:44,963] Trial 0 finished with value: 0.44470096486175115 and parameters: {'C': 44.195333187639235, 'gamma': 0.3447859467818733}. Best is trial 0 with value: 0.44470096486175115.
[I 2026-01-27 17:53:49,597] Trial 1 finished with value: 0.4385924544844073 and parameters: {'C': 35.4873459682906, 'gamma': 0.03953976858210211}. Best is trial 0 with value: 0.44470096486175115.
[I 2026-01-27 17:53:54,824] Trial 2 finished with value: 0.3991232408235394 and parameters: {'C': 1.9759039542081358, 'gamma': 0.0060932196090945495}. Best is trial 0 with value: 0.44470096486175115.
[I 2026-01-27 17:53:59,137] Trial 3 finished with value: 0.3989409491568727 and parameters: {'C': 0.010668871121780919, 'gamma': 0.002067421903653998}. Best is trial 0 with value: 0.44470096486175115.
[I 2026-01-27 17:54:04,216] Trial 4 finished with value: 0.3996336574902061 and parameters: {'C': 0.11681318888259669, 'gamma': 0.0032055386960826455}. Best is trial 0 with value: 0.44470096486175115.
[I 2

Best hyperparameters (NetLSD+SVM): {'C': 44.195333187639235, 'gamma': 0.3447859467818733}
Running final NetLSD embedding on train+test graphs...
Training final SVM on NetLSD embeddings...
NetLSD Results on IMDB-MULTI -> Acc: 0.450, F1: 0.410, AUC: 0.613, Score: 0.471
Embedding time: 5.56s | SVM training time: 1.32s | Optuna time: 46.91s | Memory usage: 1488.64 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset ENZYMES for NetLSD: 600 graphs, 6 classes


[I 2026-01-27 17:54:35,015] A new study created in memory with name: no-name-7e74cd77-5e97-4ea6-b81e-a16df0741419


ENZYMES filtering: removed 1 graphs with < 3 nodes, kept 599 graphs.
Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:54:36,778] Trial 0 finished with value: 0.1935231357887608 and parameters: {'C': 0.04910320349365897, 'gamma': 1.1412298459544579}. Best is trial 0 with value: 0.1935231357887608.
[I 2026-01-27 17:54:38,549] Trial 1 finished with value: 0.2022274652369761 and parameters: {'C': 0.2581647654522988, 'gamma': 0.0009852452299187286}. Best is trial 1 with value: 0.2022274652369761.
[I 2026-01-27 17:54:40,370] Trial 2 finished with value: 0.2026180902369761 and parameters: {'C': 0.05600696906204889, 'gamma': 0.0012147157502696018}. Best is trial 2 with value: 0.2026180902369761.
[I 2026-01-27 17:54:42,193] Trial 3 finished with value: 0.20587329857030945 and parameters: {'C': 0.019716785264861952, 'gamma': 0.0010624371356294415}. Best is trial 3 with value: 0.20587329857030945.
[I 2026-01-27 17:54:44,937] Trial 4 finished with value: 0.2083926102599078 and parameters: {'C': 1.7501349443039305, 'gamma': 0.1163228715527889}. Best is trial 4 with value: 0.2083926102599078.
[I 20

Best hyperparameters (NetLSD+SVM): {'C': 0.21244771545059238, 'gamma': 4.659584665395062}
Running final NetLSD embedding on train+test graphs...
Training final SVM on NetLSD embeddings...
NetLSD Results on ENZYMES -> Acc: 0.225, F1: 0.161, AUC: 0.534, Score: 0.267
Embedding time: 2.36s | SVM training time: 0.35s | Optuna time: 19.20s | Memory usage: 1492.91 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset ENZYMES for NetLSD: 600 graphs, 6 classes


[I 2026-01-27 17:54:57,453] A new study created in memory with name: no-name-7dae94b7-d674-4f77-b48c-117901255247


ENZYMES filtering: removed 1 graphs with < 3 nodes, kept 599 graphs.
Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:54:59,897] Trial 0 finished with value: 0.2767578488792821 and parameters: {'C': 34.062935527717, 'gamma': 0.6723227879566919}. Best is trial 0 with value: 0.2767578488792821.
[I 2026-01-27 17:55:01,714] Trial 1 finished with value: 0.2680466569640677 and parameters: {'C': 66.39001280715284, 'gamma': 1.952056013199414}. Best is trial 0 with value: 0.2767578488792821.
[I 2026-01-27 17:55:03,533] Trial 2 finished with value: 0.19830430506993008 and parameters: {'C': 63.09769142063101, 'gamma': 0.002443812505496432}. Best is trial 0 with value: 0.2767578488792821.
[I 2026-01-27 17:55:05,330] Trial 3 finished with value: 0.22274441459276018 and parameters: {'C': 0.014859713292332677, 'gamma': 0.00036973304635345365}. Best is trial 0 with value: 0.2767578488792821.
[I 2026-01-27 17:55:07,128] Trial 4 finished with value: 0.22183295625942684 and parameters: {'C': 1.5406950753605273, 'gamma': 0.002907485486382648}. Best is trial 0 with value: 0.2767578488792821.
[I 2026-01-27

Best hyperparameters (NetLSD+SVM): {'C': 34.062935527717, 'gamma': 0.6723227879566919}
Running final NetLSD embedding on train+test graphs...
Training final SVM on NetLSD embeddings...
NetLSD Results on ENZYMES -> Acc: 0.308, F1: 0.260, AUC: 0.617, Score: 0.355
Embedding time: 2.08s | SVM training time: 0.21s | Optuna time: 19.92s | Memory usage: 1502.93 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset ENZYMES for NetLSD: 600 graphs, 6 classes


[I 2026-01-27 17:55:20,363] A new study created in memory with name: no-name-67255d27-92f4-4368-9f52-53eeffe1da58


ENZYMES filtering: removed 1 graphs with < 3 nodes, kept 599 graphs.
Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:55:22,169] Trial 0 finished with value: 0.25084218493431854 and parameters: {'C': 1.6361892493710357, 'gamma': 0.3670573924036089}. Best is trial 0 with value: 0.25084218493431854.
[I 2026-01-27 17:55:24,662] Trial 1 finished with value: 0.2571930741360089 and parameters: {'C': 0.053938013285955926, 'gamma': 0.17219480409711083}. Best is trial 1 with value: 0.2571930741360089.
[I 2026-01-27 17:55:26,998] Trial 2 finished with value: 0.32306886068030577 and parameters: {'C': 2.157997468248909, 'gamma': 8.18011717531529}. Best is trial 2 with value: 0.32306886068030577.
[I 2026-01-27 17:55:28,783] Trial 3 finished with value: 0.24863920032670034 and parameters: {'C': 0.015507089297973663, 'gamma': 0.11714515389232447}. Best is trial 2 with value: 0.32306886068030577.
[I 2026-01-27 17:55:30,557] Trial 4 finished with value: 0.25594175279377485 and parameters: {'C': 0.35816047316493466, 'gamma': 0.0858973387435618}. Best is trial 2 with value: 0.32306886068030577.
[I 2026-

Best hyperparameters (NetLSD+SVM): {'C': 2.157997468248909, 'gamma': 8.18011717531529}
Running final NetLSD embedding on train+test graphs...
Training final SVM on NetLSD embeddings...


[I 2026-01-27 17:55:43,127] A new study created in memory with name: no-name-2f20c5fc-740b-4934-89ad-b8181b3de527


NetLSD Results on ENZYMES -> Acc: 0.300, F1: 0.273, AUC: 0.666, Score: 0.365
Embedding time: 2.05s | SVM training time: 0.19s | Optuna time: 20.38s | Memory usage: 1503.74 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset MUTAG for NetLSD: 188 graphs, 2 classes
Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:55:43,572] Trial 0 finished with value: 0.6733333333333333 and parameters: {'C': 0.5736573042035434, 'gamma': 0.37586349764630334}. Best is trial 0 with value: 0.6733333333333333.
[I 2026-01-27 17:55:44,034] Trial 1 finished with value: 0.6733333333333333 and parameters: {'C': 0.24080258028223805, 'gamma': 0.0003393178478442463}. Best is trial 0 with value: 0.6733333333333333.
[I 2026-01-27 17:55:44,473] Trial 2 finished with value: 0.6733333333333333 and parameters: {'C': 52.83047914438532, 'gamma': 0.00040371798888172076}. Best is trial 0 with value: 0.6733333333333333.
[I 2026-01-27 17:55:44,922] Trial 3 finished with value: 0.8983994528043775 and parameters: {'C': 81.09412736504882, 'gamma': 0.023884502032076024}. Best is trial 3 with value: 0.8983994528043775.
[I 2026-01-27 17:55:45,367] Trial 4 finished with value: 0.6733333333333333 and parameters: {'C': 0.15422667327586767, 'gamma': 0.000372296279943823}. Best is trial 3 with value: 0.8983994528043775.
[I 2026-

Best hyperparameters (NetLSD+SVM): {'C': 81.09412736504882, 'gamma': 0.023884502032076024}
Running final NetLSD embedding on train+test graphs...


[I 2026-01-27 17:55:48,274] A new study created in memory with name: no-name-815161c7-8ae8-4c07-81fd-29101a97ad23


Training final SVM on NetLSD embeddings...
NetLSD Results on MUTAG -> Acc: 0.763, F1: 0.754, AUC: 0.843, Score: 0.777
Embedding time: 0.55s | SVM training time: 0.01s | Optuna time: 4.48s | Memory usage: 1503.74 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset MUTAG for NetLSD: 188 graphs, 2 classes
Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:55:48,723] Trial 0 finished with value: 0.6653333333333333 and parameters: {'C': 87.66809693477025, 'gamma': 0.0016097095613621796}. Best is trial 0 with value: 0.6653333333333333.
[I 2026-01-27 17:55:49,233] Trial 1 finished with value: 0.878063492063492 and parameters: {'C': 32.85159638528139, 'gamma': 3.1037329792990342}. Best is trial 1 with value: 0.878063492063492.
[I 2026-01-27 17:55:49,703] Trial 2 finished with value: 0.6653333333333333 and parameters: {'C': 2.829792768458278, 'gamma': 0.001767861620673457}. Best is trial 1 with value: 0.878063492063492.
[I 2026-01-27 17:55:50,423] Trial 3 finished with value: 0.6643333333333333 and parameters: {'C': 0.7029766237901394, 'gamma': 0.002366627973957685}. Best is trial 1 with value: 0.878063492063492.
[I 2026-01-27 17:55:51,133] Trial 4 finished with value: 0.6643333333333333 and parameters: {'C': 0.03438253011082182, 'gamma': 0.8325788095228694}. Best is trial 1 with value: 0.878063492063492.
[I 2026-01-27 17:55:

Best hyperparameters (NetLSD+SVM): {'C': 32.85159638528139, 'gamma': 3.1037329792990342}
Running final NetLSD embedding on train+test graphs...


[I 2026-01-27 17:55:54,764] A new study created in memory with name: no-name-cf686a17-11a9-4346-a0e0-36190f239937


Training final SVM on NetLSD embeddings...
NetLSD Results on MUTAG -> Acc: 0.789, F1: 0.778, AUC: 0.831, Score: 0.794
Embedding time: 0.55s | SVM training time: 0.01s | Optuna time: 5.84s | Memory usage: 1503.74 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
Loaded dataset MUTAG for NetLSD: 188 graphs, 2 classes
Running Optuna for NetLSD+SVM hyperparameter tuning...


[I 2026-01-27 17:55:55,211] Trial 0 finished with value: 0.6763333333333332 and parameters: {'C': 2.149651540372503, 'gamma': 0.000773951067036475}. Best is trial 0 with value: 0.6763333333333332.
[I 2026-01-27 17:55:55,672] Trial 1 finished with value: 0.6763333333333332 and parameters: {'C': 0.012819520405551538, 'gamma': 0.04646265411363027}. Best is trial 0 with value: 0.6763333333333332.
[I 2026-01-27 17:55:56,122] Trial 2 finished with value: 0.6763333333333332 and parameters: {'C': 0.0786341955232671, 'gamma': 0.04234625504326544}. Best is trial 0 with value: 0.6763333333333332.
[I 2026-01-27 17:55:56,585] Trial 3 finished with value: 0.6763333333333332 and parameters: {'C': 0.3036874786054862, 'gamma': 0.11250456730688667}. Best is trial 0 with value: 0.6763333333333332.
[I 2026-01-27 17:55:57,034] Trial 4 finished with value: 0.8983994528043775 and parameters: {'C': 20.017512642443723, 'gamma': 0.45360797820022825}. Best is trial 4 with value: 0.8983994528043775.
[I 2026-01-27

Best hyperparameters (NetLSD+SVM): {'C': 9.811077059604782, 'gamma': 1.9642985053025699}
Running final NetLSD embedding on train+test graphs...
Training final SVM on NetLSD embeddings...
NetLSD Results on MUTAG -> Acc: 0.868, F1: 0.870, AUC: 0.862, Score: 0.867
Embedding time: 0.55s | SVM training time: 0.01s | Optuna time: 4.53s | Memory usage: 1503.74 MB
NetLSD summary stored in: /content/drive/MyDrive/InformationSystems/Classification/results/classification/netlsd.csv
