In [None]:
!pip install torch_geometric pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.8.0+cu126.html

In [None]:
import torch
import optuna
import torch.nn as nn
import torch.nn.functional as F
import json

from torch_geometric.loader import DataLoader
from torch_geometric import seed_everything
from torch_geometric.nn import GINConv, global_add_pool

In [None]:
class GIN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout):
        super().__init__()

        self.dropout = dropout
        self.convs = nn.ModuleList()
        self.batch_norms = nn.ModuleList()

        for _ in range(num_layers - 1):
            self.convs.append(
                GINConv(nn.Sequential(
                    nn.Linear(input_dim, 2 * hidden_dim),
                    nn.BatchNorm1d(2 * hidden_dim),
                    nn.ReLU(),
                    nn.Linear(2 * hidden_dim, hidden_dim),
                ))
            )
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
            input_dim = hidden_dim

        self.lin1 = nn.Linear(hidden_dim, hidden_dim)
        self.batch_norm1 = nn.BatchNorm1d(hidden_dim)
        self.classifier = nn.Linear(hidden_dim, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        for conv, batch_norm in zip(self.convs, self.batch_norms):
            x = F.relu(batch_norm(conv(x, edge_index)))
            x = F.dropout(x, self.dropout, training=self.training)
        x = global_add_pool(x, batch)
        x = F.relu(self.batch_norm1(self.lin1(x)))
        return self.classifier(x).view(-1)

In [None]:
train_dataset = torch.load('/kaggle/input/graphs-with-automorphisms/train_dataset.pt',weights_only=False)
val_dataset = torch.load('/kaggle/input/graphs-with-automorphisms/val_dataset.pt',weights_only=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/graphs-with-automorphisms/train_dataset.pt'

In [None]:
def objective(trial: optuna.Trial):
    global model, optimizer, criterion, scheduler, train_loader, val_loader

    hidden_dim = trial.suggest_categorical('hidden_dim', [64, 128, 256, 512])
    num_layers = trial.suggest_int('num_layers', 3, 8)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    model = GIN(3, hidden_dim=hidden_dim, num_layers=num_layers, dropout=dropout).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.BCEWithLogitsLoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=3
    )

    n_epochs = 50
    best_val_acc = 0.0
    patience = 10
    patience_counter = 0

    trial_history = []
    
    for epoch in range(1, n_epochs + 1):
        train_loss = train()
        train_acc, train_f1 = test(train_loader)
        val_acc, val_f1 = test(val_loader)
        
        scheduler.step(val_acc)

        trial_history.append({
            'trial': trial.number,
            'epoch': epoch,
            'train_loss': train_loss,
            'train_acc': train_acc,
            'val_acc': val_acc,
            'lr': optimizer.param_groups[0]['lr']
        })
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
        else:
            patience_counter += 1
        
        trial.report(val_acc, epoch)
        
        if trial.should_prune():
            raise optuna.TrialPruned()
        
        if patience_counter >= patience:
            print(f"Trial {trial.number}: Early stopping at epoch {epoch}")
            break
        
        if epoch % 10 == 0:
            print(f"Trial {trial.number} | Epoch {epoch:02d} | "
                  f"Train Loss: {train_loss:.4f} | "
                  f"Val Acc: {val_acc:.4f}")
    
    return best_val_acc

In [3]:
def train():
    model.train()
    total_loss = 0

    for data in train_loader:
        data = data.to(device)
        out = model(data)
        loss = criterion(out, data.y.float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * data.num_graphs
        
    return total_loss / len(train_loader.dataset)


@torch.no_grad()
def test(loader):
    model.eval()
    predictions = []
    labels = []

    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = (out > 0).float()
        predictions.append(pred.cpu())
        labels.append(data.y.cpu())

    accuracy = metrics.accuracy_score(torch.cat(labels), torch.cat(predictions))
    f1 = metrics.f1_score(torch.cat(labels), torch.cat(predictions))

    return accuracy, f1

In [None]:
study = optuna.create_study(
    direction="maximize", 
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10),
    study_name="GIN for partial automorphism extension problem")

study.optimize(objective, n_trials=100)

trials_df = study.trials_dataframe()
trials_df.to_csv("/kaggle/working/optuna_trials_summary.csv", index=False)

trial = study.best_trial
print(f"  Value (Val Acc): {trial.value:.4f}")
print("\n  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

best_params = study.best_params
config_path = f"/kaggle/working/best_config.json"
with open(config_path, "w") as f:
    json.dump(best_params, f, indent=4)