# NTK classifier for Cora

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import torch
import numpy as np
from exp_ntk_certify import run

In [4]:
def get_str_l(l, precision=2):
    l_str = []
    for el in l:
        l_str.append(f"{el:.{precision}f}")
    return l_str
    

In [5]:
seed = 0

data_params = dict(
    dataset = "cora",
    learning_setting = "inductive", # or "transdructive"
    specification = dict(
        n_per_class = 20,
        fraction_test = 0.1,
        data_dir = "./data",
        make_undirected = True,
        binary_attr = False,
        balance_test = True,
    )
)

model_params = dict(
    label = "GCN",
    model = "GCN",
    normalization = "row_normalization",
    depth = 1,
    #regularizer = 1e-8
    regularizer = 1,
    pred_method = "svm",
)

certificate_params = dict(
    n_adversarial = 10, # number adversarial nodes
    perturbation_model = "l0",
    delta = 0.01 # l0: local budget = delta * feature_dim
)

verbosity_params = dict(
    debug_lvl = "warning"
)  

other_params = dict(
    device = "0",
    dtype = torch.float64,
    allow_tf32 = False
)

In [6]:
def run_exp(n_seeds, data_params, model_params, certificate_params,
            verbosity_params, other_params):
    acc_l = []
    min_ypred = []
    max_ypred = []
    cond = []
    min_ntklabeled = []
    max_ntklabeled = []
    min_ntkunlabeled = []
    max_ntkunlabeled = []
    for seed in range(n_seeds):
        data_params["specification"]["seed"] = seed
        res = run(data_params, model_params, certificate_params,
                  verbosity_params, other_params, seed)
        acc_l.append(res["accuracy"])
        min_ypred.append(res["min_ypred"])
        max_ypred.append(res["max_ypred"])
        min_ntklabeled.append(res["min_ntklabeled"])
        max_ntklabeled.append(res["max_ntklabeled"])
        min_ntkunlabeled.append(res["min_ntkunlabeled"])
        max_ntkunlabeled.append(res["max_ntkunlabeled"])
        cond.append(res["cond"])
    print(f"Accuracy: {get_str_l(acc_l)}")
    print(f"Min y_pred: {get_str_l(min_ypred)}")
    print(f"Max y_pred: {get_str_l(max_ypred)}")
    print(f"Min NTK_labeled: {get_str_l(min_ntklabeled)}")
    print(f"Max NTK_labeled: {get_str_l(max_ntklabeled)}")
    print(f"Min NTK_unlabeled: {get_str_l(min_ntkunlabeled)}")
    print(f"Max NTK_unlabeled: {get_str_l(max_ntkunlabeled)}")
    print(f"Condition: {get_str_l(cond, precision=0)}")

In [20]:
model_params["regularizer"] = 0.1
model_params["pred_method"] = "svm"
model_params["cache_size"] = 10000
data_params["dataset"] = "cora_ml"
other_params["device"] = 0
certificate_params["n_adversarial"] = 10
certificate_params["delta"] = 0.01
verbosity_params["debug_lvl"] = "info"
seed = 0
data_params["specification"]["seed"] = seed
run(data_params, model_params, certificate_params, verbosity_params, other_params, seed)

2024-01-10 13:50:15 (INFO): Starting experiment exp_ntk_certify with configuration:
2024-01-10 13:50:15 (INFO): data_params: {'dataset': 'cora_ml', 'learning_setting': 'inductive', 'specification': {'n_per_class': 20, 'fraction_test': 0.1, 'data_dir': './data', 'make_undirected': True, 'binary_attr': False, 'balance_test': True, 'seed': 0}}
2024-01-10 13:50:15 (INFO): model_params: {'label': 'GCN', 'model': 'GCN', 'normalization': 'row_normalization', 'depth': 1, 'regularizer': 0.1, 'pred_method': 'svm', 'cache_size': 10000}
2024-01-10 13:50:15 (INFO): certification_params: {'n_adversarial': 10, 'perturbation_model': 'l0', 'delta': 0.01}
2024-01-10 13:50:15 (INFO): verbosity_params: {'debug_lvl': 'info'}
2024-01-10 13:50:15 (INFO): other_params: {'device': 0, 'dtype': torch.float64, 'allow_tf32': False}
2024-01-10 13:50:15 (INFO): seed: 0
2024-01-10 13:50:15 (INFO): Currently on gpu device cuda:0
2024-01-10 13:50:15 (INFO): number of samples
 - labeled: 140 
 - val: 140 
 - test: 284 


tensor(0., device='cuda:0', dtype=torch.float64)
tensor(1., device='cuda:0', dtype=torch.float64)
tensor(50.6356, device='cuda:0', dtype=torch.float64)
tensor(49., device='cuda:0', dtype=torch.float64)
tensor(2., device='cuda:0', dtype=torch.float64)
tensor(176., device='cuda:0', dtype=torch.float64)
tensor([85., 59., 59.,  ..., 45., 49., 29.], device='cuda:0',
       dtype=torch.float64)


2024-01-10 13:50:21 (INFO): Accuracy 0.8591549396514893


{'accuracy': 0.8591549396514893,
 'accuracy_ub': 0.6654929518699646,
 'accuracy_lb': 0.8697183132171631,
 'accuracy_cert': 0.7922534942626953,
 'min_ypred': -2.007810115814209,
 'max_ypred': 2.0351035594940186,
 'min_ylb': -2.165675163269043,
 'max_ylb': 2.2532241344451904,
 'min_yub': -4.956961631774902,
 'max_yub': 6.662258148193359,
 'min_ntklb': 0.4616764139530176,
 'max_ntklb': 139.69693436430674,
 'min_ntkub': 2.3331038349874444,
 'max_ntkub': 269.95186495266415,
 'min_ntklabeled': 3.518943905429874,
 'max_ntklabeled': 142.08693269172264,
 'min_ntkunlabeled': 2.644019167238364,
 'max_ntkunlabeled': 47.537976680322174,
 'cond': 26316.41952343723}

In [19]:
with np.load(path_to_file, allow_pickle=True) as loader:
    loader = dict(loader)
    adj_matrix = sp.csr_matrix((loader['adj_data'], loader['adj_indices'],
                                            loader['adj_indptr']), shape=loader['adj_shape'])

    if 'attr_data' in loader:
        attr_matrix = sp.csr_matrix((loader['attr_data'], loader['attr_indices'],
                                                loader['attr_indptr']), shape=loader['attr_shape'])
print(attr_matrix)

  (0, 49)	0.105999365
  (0, 66)	0.06255302
  (0, 107)	0.09161823
  (0, 127)	0.091267556
  (0, 184)	0.04573838
  (0, 267)	0.07166049
  (0, 287)	0.038020283
  (0, 390)	0.05367108
  (0, 535)	0.070857726
  (0, 547)	0.092713065
  (0, 620)	0.06185835
  (0, 680)	0.068943776
  (0, 701)	0.091267556
  (0, 751)	0.06703097
  (0, 758)	0.1839517
  (0, 783)	0.09197585
  (0, 789)	0.108523354
  (0, 860)	0.09741358
  (0, 993)	0.07299882
  (0, 1001)	0.084178194
  (0, 1017)	0.084178194
  (0, 1056)	0.11042609
  (0, 1084)	0.07213576
  (0, 1152)	0.06029267
  (0, 1191)	0.09197585
  :	:
  (2994, 952)	0.41955727
  (2994, 1039)	0.13663158
  (2994, 1112)	0.14275049
  (2994, 1132)	0.08866522
  (2994, 1160)	0.075278506
  (2994, 1178)	0.079584196
  (2994, 1201)	0.076155886
  (2994, 1563)	0.09035322
  (2994, 1566)	0.1593216
  (2994, 1578)	0.39355066
  (2994, 1678)	0.070223026
  (2994, 1706)	0.06342342
  (2994, 1711)	0.07493567
  (2994, 1780)	0.12840252
  (2994, 1788)	0.11826994
  (2994, 1789)	0.45228663
  (2994, 1896

In [17]:
from pathlib import Path
import scipy.sparse as sp

directory = "./data"
if isinstance(directory, str):
    directory = Path(directory)
path_to_file = directory / ("cora_ml_v2.npz")
with np.load(path_to_file, allow_pickle=True) as loader:
    loader = dict(loader)
    del_entries = []
    # Construct sparse matrices
    print(loader)
    for key in loader.keys():
        if key.endswith('.data'):
            matrix_name = key[:-5]
            mat_data = key
            mat_indices = matrix_name + ".indices"
            mat_indptr = matrix_name + ".indptr"
            mat_shape = matrix_name + ".shape"
            M = sp.csr_matrix((loader[mat_data], loader[mat_indices],
                            loader[mat_indptr]), shape=loader[mat_shape])
            if matrix_name == "adj_matrix":
                A = M.toarray()
            elif matrix_name == "attr_matrix":
                print(M)
                X = M.toarray()
            else:
                assert False
            del_entries.extend([mat_data, mat_indices, mat_indptr, mat_shape])
    # Delete sparse matrix entries
    for del_entry in del_entries:
        del loader[del_entry]
    y = np.array(loader["labels"])

{'adj_data': array([1., 1., 1., ..., 1., 1., 1.], dtype=float32), 'adj_indices': array([1636, 1638, 2357, ...,  200,  745, 1865], dtype=int32), 'adj_indptr': array([   0,    3,    6, ..., 8413, 8416, 8416], dtype=int32), 'adj_shape': array([2995, 2995]), 'attr_data': array([0.10599937, 0.06255302, 0.09161823, ..., 0.11412541, 0.14126119,
       0.14126119], dtype=float32), 'attr_indices': array([  49,   66,  107, ..., 2327, 2561, 2573], dtype=int32), 'attr_indptr': array([     0,     85,    133, ..., 151103, 151138, 151171], dtype=int32), 'attr_shape': array([2995, 2879]), 'labels': array([0, 1, 1, ..., 4, 6, 3]), 'node_names': array(['129558\n', '95225\n', '1116454\n', ..., '1109203\n', '1113182\n',
       '119952\n'], dtype='<U8'), 'attr_names': array(['000', '10', '100', ..., 'york', 'young', 'zero'], dtype='<U17'), 'class_names': array(['Artificial_Intelligence/Machine_Learning/Case-Based',
       'Artificial_Intelligence/Machine_Learning/Theory',
       'Artificial_Intelligence/Ma

# SVM

In [None]:
model_params["regularizer"] = 0.1
data_params["dataset"] = "cora"
model_params["pred_method"] = "svm"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params)

In [None]:
model_params["regularizer"] = 0.1
data_params["dataset"] = "cora"
model_params["pred_method"] = "svm"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params)

In [None]:
model_params["regularizer"] = 1
data_params["dataset"] = "cora"
model_params["pred_method"] = "krr"
n_seeds = 2
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params)