In [1]:
%load_ext autoreload
%autoreload 2

In [22]:
import torch
import numpy as np
from src.data import get_graph, split
from exp_ntk_certify_collective_label import run, configure_hardware
from src.models.common import row_normalize, sym_normalize
from src.models import create_model
import src.utils as utils

# Specification

In [86]:
seed = 2

data_params = dict(
    dataset = "karate_club",    
    learning_setting = "transductive", # or "transdructive"
    specification = dict(
        n_per_class = 2,
        fraction_test = 0.1,
        data_dir = "./data",
        balance_test = True,
        seed = 2,
    )
)

model_params = dict(
    label = "SGC",
    model = "GCN",
    normalization = "row_normalization",
    depth = 1,
    regularizer = 0.01,
    pred_method = "svm",
    activation = "linear",
    solver = "qplayer",
    alpha_tol = 1e-4,
    bias = False,
)

model_params_finite = dict(
    model = "GCN",
    normalization = "row_normalization",
    activation = "linear",
    depth = 1,
    n_filter = 8,
)

certificate_params = dict(
    delta = 0.2,
    LogToConsole = 0,
    OutputFlag = 1,
    IntegralityFocus = 1,
    use_tight_big_M = 1
)

verbosity_params = dict(
    debug_lvl = "info"
)  

other_params = dict(
    device = "cpu",
    dtype = torch.float64,
    allow_tf32 = False,
    path_gurobi_license = "/ceph/ssd/staff/gosl/app/gurobi.lic"
)
n_epochs = 100

In [89]:
# Get collective certificate
result = run(data_params, model_params, certificate_params, verbosity_params, other_params, seed)
idx_train, idx_val, = result["idx_train"], result["idx_val"]
idx_labeled, idx_test = result["idx_labeled"], result["idx_test"]
acc_tst_wide = result["accuracy_test"]
print(result["y_is_robust"])
print(result["y_true_cls"])
print(result["y_flip"])
print(result["idx_labeled"])
# reset/configure hardware
other_params["dtype"] = torch.float32
device, dtype = configure_hardware(other_params, seed)
rng = np.random.Generator(np.random.PCG64(seed))
# get graph
X, A, y, _, _, _ = get_graph(data_params, sort=True)
X = torch.tensor(X, dtype=dtype, device=device)
A = torch.tensor(A, dtype=dtype, device=device)
y = torch.tensor(y, dtype=dtype, device=device)
if "normalization" in model_params:
    if model_params["normalization"] == "row_normalization":
        A = row_normalize(A)
    else:
        A = sym_normalize(A)
y_pert = y.clone()
y_flip = torch.tensor(result["y_flip"], dtype=dtype, device=device)
y_pert[idx_labeled] = y_pert[idx_labeled].logical_xor(y_flip).float()
for i, y in enumerate([y, y_pert]):
    # init model
    model_params_finite["n_classes"] = len(np.unique(y))
    model_params_finite["n_features"] = X.shape[1]
    model = create_model(model_params_finite)
    model = model.to(device)
    # train model
    model.train()
    loss_f = torch.nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    for epoch in range(n_epochs):
        optimizer.zero_grad()
        logits = model(X, A).reshape(-1)
        loss = loss_f(logits[idx_labeled], y[idx_labeled])
        loss.backward()
        optimizer.step()
        if epoch % 20 == 0:
            print(f"Epoch {epoch}: loss={loss.item()}")
    pred = model(X, A).reshape(-1)
    y_pred = (pred > 0.5).float()
    acc_trn = (y_pred[idx_labeled] == y[idx_labeled]).float().mean()
    acc_tst = (y_pred[idx_test] == y[idx_test]).float().mean()
    if i == 0:
        print(f"Clean: Acc.Trn.: {acc_trn:.2f} Acc.Tst.: {acc_tst:.2f}")
        y_pred_cln = y_pred
    else:
        print(f"Perturbed: Acc.Trn.: {acc_trn:.2f} Acc.Tst.: {acc_tst:.2f}")
        y_pred_pert = y_pred
y_is_robust = torch.tensor(result["y_is_robust"], dtype=torch.long, device=device)
y_is_robust_emp = (y_pred_cln[idx_test] == y_pred_pert[idx_test]).long()
tst_robust = y_is_robust_emp.float().mean()
tst_robust_c = y_is_robust_emp.sum()
print(f"{tst_robust_c} Robust Empirical ({tst_robust:.2f}%)")
y_is_robust_c = y_is_robust.sum()
y_is_robust_mask = y_is_robust == 1
y_both_robust_c = (y_is_robust_emp[y_is_robust_mask] == y_is_robust[y_is_robust_mask]).sum()
print(f"{y_is_robust_c} Robust in NTK ({y_is_robust.float().mean():.2f}%)")
print(f"of which {y_both_robust_c / y_is_robust[y_is_robust_mask].sum():.2f}% still robust for finite width")
# Robust Empirical but not Robust NTK?
y_not_robust_mask = y_is_robust == 0
y_diff_robust_c = (y_is_robust_emp[y_not_robust_mask] != y_is_robust[y_not_robust_mask]).sum()
print(f"{y_diff_robust_c / len(y_is_robust[y_not_robust_mask]):.2f}% of NTK-non robust is robust for finite width")
print(f"{y_is_robust}")
print(f"{y_is_robust_emp}")


2024-09-06 19:44:15 (INFO): Starting experiment exp_ntk_certify_collective_label with configuration:
2024-09-06 19:44:15 (INFO): data_params: {'dataset': 'karate_club', 'learning_setting': 'transductive', 'specification': {'n_per_class': 2, 'fraction_test': 0.1, 'data_dir': './data', 'balance_test': True, 'seed': 2}}
2024-09-06 19:44:15 (INFO): model_params: {'label': 'SGC', 'model': 'GCN', 'normalization': 'row_normalization', 'depth': 1, 'regularizer': 0.01, 'pred_method': 'svm', 'activation': 'linear', 'solver': 'qplayer', 'alpha_tol': 0.0001, 'bias': False}
2024-09-06 19:44:15 (INFO): certification_params: {'delta': 0.2, 'LogToConsole': 0, 'OutputFlag': 1, 'IntegralityFocus': 1, 'use_tight_big_M': 1}
2024-09-06 19:44:15 (INFO): verbosity_params: {'debug_lvl': 'info'}
2024-09-06 19:44:15 (INFO): other_params: {'device': 'cpu', 'dtype': torch.float32, 'allow_tf32': False, 'path_gurobi_license': '/ceph/ssd/staff/gosl/app/gurobi.lic'}


2024-09-06 19:44:15 (INFO): seed: 2
2024-09-06 19:44:15 (INFO): number of samples
 - labeled: 4 
 - val: 4 
 - test: 4 
 - unlabeled: 22
2024-09-06 19:44:15 (INFO): Delta: 0.2
2024-09-06 19:44:15 (INFO): Test accuracy: 0.9615384340286255
2024-09-06 19:44:15 (INFO): Train accuracy: 1.0
2024-09-06 19:44:15 (INFO): Using tight big-Ms.


8 alphas found: ['0.0100', '0.0100', '0.0100', '0.0100', '0.0100', '0.0100', '0.0100', '0.0100']




Set parameter IntegralityFocus to value 1


2024-09-06 19:44:15 (INFO): Set parameter IntegralityFocus to value 1


Set parameter IntFeasTol to value 0.0001


2024-09-06 19:44:15 (INFO): Set parameter IntFeasTol to value 0.0001
2024-09-06 19:44:15 (INFO): Optimization status: 2
2024-09-06 19:44:15 (INFO): Objective: #sign flips 5.0 out of 26
2024-09-06 19:44:15 (INFO): Percentage of nodes certified 0.8076923076923077
2024-09-06 19:44:15 (INFO): Certified accuracy (poisoning): 0.8076923076923077


Loaded user MIP start with objective -0

[1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0]
[1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1]
[-0.0, 1.0, 1.0, 1.0, -0.0, -0.0, 1.0, 1.0]
[23, 32, 5, 19, 27, 24, 7, 3]
Epoch 0: loss=0.667255163192749
Epoch 20: loss=0.34523576498031616
Epoch 40: loss=0.11127869039773941
Epoch 60: loss=0.03858467563986778
Epoch 80: loss=0.02004377357661724
Clean: Acc.Trn.: 1.00 Acc.Tst.: 0.92
Epoch 0: loss=0.6753969192504883
Epoch 20: loss=0.2727285921573639
Epoch 40: loss=0.08812858909368515
Epoch 60: loss=0.03193012252449989
Epoch 80: loss=0.016871007159352303
Perturbed: Acc.Trn.: 1.00 Acc.Tst.: 0.27
7 Robust Empirical (0.27%)
21 Robust in NTK (0.81%)
of which 0.19% still robust for finite width
0.60% of NTK-non robust is robust for finite width
tensor([1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
        1, 