# NTK classifier for Cora

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np
from exp_ntk import run

In [3]:
def get_str_l(l, precision=2):
    l_str = []
    for el in l:
        l_str.append(f"{el:.{precision}f}")
    return l_str
    

In [4]:
seed = 0

data_params = dict(
    dataset = "cora",
    learning_setting = "inductive", # or "transdructive"
    specification = dict(
        n_per_class = 20,
        fraction_test = 0.1,
        data_dir = "./data",
        make_undirected = True,
        binary_attr = False,
        balance_test = True,
    )
)

model_params = dict(
    label = "GCN",
    model = "GCN",
    normalization = "row_normalization",
    depth = 1,
    #regularizer = 1e-8
    regularizer = 1,
    pred_method = "svm",
)

verbosity_params = dict(
    debug_lvl = "warning"
)  

other_params = dict(
    device = "0",
    dtype = torch.float64,
    allow_tf32 = False
)

In [5]:
def run_exp(n_seeds, data_params, model_params, verbosity_params, other_params, n_seed_l=None):
    acc_l = []
    min_ypred = []
    max_ypred = []
    cond = []
    min_ntklabeled = []
    max_ntklabeled = []
    min_ntkunlabeled = []
    max_ntkunlabeled = []
    if n_seed_l is None:
        seeds = [seed for seed in range(n_seeds)]
    else:
        seeds = n_seed_l
    for seed in seeds:
        data_params["specification"]["seed"] = seed
        res = run(data_params, model_params, verbosity_params, other_params, seed)
        acc_l.append(res["accuracy"])
        min_ypred.append(res["min_ypred"])
        max_ypred.append(res["max_ypred"])
        min_ntklabeled.append(res["min_ntklabeled"])
        max_ntklabeled.append(res["max_ntklabeled"])
        min_ntkunlabeled.append(res["min_ntkunlabeled"])
        max_ntkunlabeled.append(res["max_ntkunlabeled"])
        cond.append(res["cond"])
    print(f"Accuracy: {get_str_l(acc_l)}")
    print(f"Accuracy Mean: {np.array(acc_l).mean()}")
    print(f"Accuracy Std: {np.array(acc_l).std()}")
    print(f"Min y_pred: {get_str_l(min_ypred)}")
    print(f"Max y_pred: {get_str_l(max_ypred)}")
    print(f"Min NTK_labeled: {get_str_l(min_ntklabeled)}")
    print(f"Max NTK_labeled: {get_str_l(max_ntklabeled)}")
    print(f"Min NTK_unlabeled: {get_str_l(min_ntkunlabeled)}")
    print(f"Max NTK_unlabeled: {get_str_l(max_ntkunlabeled)}")
    print(f"Condition: {get_str_l(cond, precision=0)}")

In [8]:
model_params["regularizer"] = 0.1
model_params["pred_method"] = "svm"
model_params["cache_size"] = 1000
data_params["dataset"] = "cora"
other_params["device"] = "cpu"
seed = 1
data_params["specification"]["seed"] = seed
verbosity_params["debug_lvl"] = "warning"
run(data_params, model_params, verbosity_params, other_params, seed)

100


{'accuracy': 0.8131868243217468,
 'min_ypred': -1.8052204847335815,
 'max_ypred': 1.5962820053100586,
 'min_ntklabeled': 0.9832799462292519,
 'max_ntklabeled': 44.095950974899985,
 'min_ntkunlabeled': 0.8673833181746758,
 'max_ntkunlabeled': 24.497745429205672,
 'cond': 9383.388048591205}

In [7]:
rng = np.random.Generator(np.random.PCG64(10))
y = rng.integers(0,2,size=10)
print(y)

[1 1 0 0 1 1 1 0 1 1]


# SVM

In [14]:
model_params["regularizer"] = 0.1
data_params["dataset"] = "cora"
model_params["pred_method"] = "svm"
n_seed_l = None
verbosity_params["debug_lvl"] = "warning"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params, n_seed_l)

Accuracy: ['0.85', '0.81', '0.83', '0.82', '0.85', '0.77', '0.82', '0.83', '0.85', '0.81']
Accuracy Mean: 0.8238095343112946
Accuracy Std: 0.023308333072155348
Min y_pred: ['-1.73', '-1.81', '-2.02', '-1.70', '-1.68', '-1.72', '-1.81', '-1.75', '-1.81', '-1.79']
Max y_pred: ['1.76', '1.60', '1.75', '1.52', '1.88', '1.54', '2.20', '1.43', '1.71', '1.52']
Min NTK_labeled: ['1.09', '0.98', '0.77', '1.25', '0.55', '0.77', '0.57', '0.76', '0.57', '1.03']
Max NTK_labeled: ['50.10', '44.10', '52.10', '48.10', '50.10', '48.10', '50.10', '42.10', '46.10', '46.10']
Min NTK_unlabeled: ['0.91', '0.87', '0.63', '1.07', '0.63', '0.65', '0.62', '0.67', '0.53', '0.95']
Max NTK_unlabeled: ['25.00', '24.50', '21.11', '25.00', '25.00', '16.46', '21.11', '21.11', '21.44', '15.77']
Condition: ['9640', '9383', '9556', '9577', '9209', '9500', '7186', '8973', '9490', '9285']


In [35]:
model_params["regularizer"] = 0.05
data_params["dataset"] = "cora_ml"
model_params["pred_method"] = "svm"
n_seed_l = None
verbosity_params["debug_lvl"] = "warning"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params, n_seed_l)

Accuracy: ['0.86', '0.87', '0.85', '0.86', '0.78', '0.79', '0.85', '0.84', '0.82', '0.82']
Accuracy Mean: 0.8334507048130035
Accuracy Std: 0.02933545175833569
Min y_pred: ['-2.01', '-1.80', '-1.93', '-1.87', '-2.13', '-2.22', '-1.89', '-2.01', '-1.89', '-1.98']
Max y_pred: ['2.04', '1.75', '2.08', '2.14', '2.47', '2.29', '2.01', '1.82', '2.00', '2.06']
Min NTK_labeled: ['3.52', '3.30', '2.93', '3.29', '2.95', '3.51', '3.47', '1.89', '2.73', '2.61']
Max NTK_labeled: ['142.09', '168.08', '132.09', '110.09', '72.59', '108.09', '104.09', '324.07', '74.09', '139.80']
Min NTK_unlabeled: ['2.64', '3.17', '2.63', '2.63', '3.35', '2.63', '3.07', '3.23', '2.33', '3.04']
Max NTK_unlabeled: ['47.54', '39.93', '38.96', '41.18', '47.54', '47.08', '33.10', '74.87', '39.56', '43.30']
Condition: ['26316', '13169', '25396', '25245', '25365', '16955', '16290', '13427', '15812', '27280']


In [10]:
model_params["regularizer"] = 0.05
data_params["dataset"] = "cora_ml"
model_params["pred_method"] = "svm"
n_seed_l = None
verbosity_params["debug_lvl"] = "warning"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params, n_seed_l)

Accuracy: ['0.82', '0.83', '0.81', '0.83', '0.86', '0.79', '0.81', '0.86', '0.84', '0.80']
Accuracy Mean: 0.8242957711219787
Accuracy Std: 0.022625805817998605
Min y_pred: ['-1.79', '-1.73', '-2.09', '-1.69', '-1.95', '-1.59', '-1.71', '-1.93', '-1.65', '-1.77']
Max y_pred: ['1.46', '1.94', '1.75', '1.62', '2.20', '1.53', '1.45', '2.11', '1.46', '1.90']
Min NTK_labeled: ['3.37', '2.89', '3.18', '2.50', '2.90', '2.61', '3.29', '3.39', '2.86', '2.66']
Max NTK_labeled: ['160.04', '134.04', '136.04', '196.03', '272.02', '72.54', '114.04', '128.04', '134.04', '100.04']
Min NTK_unlabeled: ['2.68', '2.47', '2.71', '3.21', '2.89', '2.54', '2.97', '3.19', '2.78', '2.70']
Max NTK_unlabeled: ['42.41', '30.25', '36.84', '59.64', '115.95', '43.28', '40.20', '55.51', '30.45', '42.13']
Condition: ['52079', '18747', '52854', '50611', '24097', '49535', '24905', '50818', '14519', '51674']


In [9]:
model_params["regularizer"] = 0.05
data_params["dataset"] = "citeseer"
model_params["pred_method"] = "svm"
n_seed_l = None
verbosity_params["debug_lvl"] = "warning"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params, n_seed_l)

Accuracy: ['0.78', '0.72', '0.77', '0.73', '0.75', '0.73', '0.74', '0.71', '0.70', '0.72']
Accuracy Mean: 0.7336448311805726
Accuracy Std: 0.023918660468398557
Min y_pred: ['-1.52', '-1.75', '-1.56', '-1.71', '-1.64', '-1.59', '-1.50', '-1.68', '-1.64', '-1.59']
Max y_pred: ['1.31', '1.17', '1.17', '1.74', '1.51', '1.14', '1.12', '1.44', '1.18', '1.10']
Min NTK_labeled: ['3.00', '2.90', '3.10', '3.17', '2.67', '3.16', '2.86', '3.08', '2.81', '2.93']
Max NTK_labeled: ['78.04', '84.04', '80.04', '86.04', '90.04', '80.04', '66.04', '66.04', '80.04', '84.04']
Min NTK_unlabeled: ['2.85', '2.92', '2.80', '2.58', '2.73', '2.94', '2.74', '2.85', '2.84', '2.61']
Max NTK_unlabeled: ['29.62', '32.04', '34.53', '43.90', '35.96', '35.19', '27.77', '33.19', '28.71', '34.53']
Condition: ['10379', '29110', '29321', '18443', '29388', '29316', '28212', '27603', '12103', '28507']


In [19]:
model_params["regularizer"] = 0.1
data_params["dataset"] = "citeseer"
model_params["pred_method"] = "svm"
n_seed_l = None
verbosity_params["debug_lvl"] = "warning"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params, n_seed_l)

Accuracy: ['0.78', '0.69', '0.73', '0.72', '0.73', '0.73', '0.74', '0.68', '0.69', '0.72']
Accuracy Mean: 0.7210280179977417
Accuracy Std: 0.02725144326863752
Min y_pred: ['-1.63', '-2.04', '-1.68', '-1.94', '-1.98', '-1.62', '-1.65', '-2.03', '-1.89', '-1.76']
Max y_pred: ['1.37', '1.49', '1.91', '2.01', '1.75', '1.35', '1.38', '1.83', '1.65', '1.49']
Min NTK_labeled: ['3.00', '2.90', '3.10', '3.17', '2.67', '3.16', '2.86', '3.08', '2.81', '2.93']
Max NTK_labeled: ['78.09', '84.09', '80.09', '86.09', '90.09', '80.09', '66.09', '66.09', '80.09', '84.09']
Min NTK_unlabeled: ['2.85', '2.92', '2.80', '2.58', '2.73', '2.94', '2.74', '2.85', '2.84', '2.61']
Max NTK_unlabeled: ['29.62', '32.04', '34.53', '43.90', '35.96', '35.19', '27.77', '33.19', '28.71', '34.53']
Condition: ['7642', '14556', '14661', '11387', '14695', '14658', '14107', '13802', '8530', '14254']


In [33]:
model_params["regularizer"] = 0.1
data_params["dataset"] = "pubmed"
model_params["pred_method"] = "svm"
n_seed_l = None
n_seeds = 10
other_params["device"] = "cpu"
verbosity_params["debug_lvl"] = "warning"
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params, n_seed_l=n_seed_l)

Accuracy: ['0.78', '0.76', '0.65', '0.74', '0.72', '0.66', '0.77', '0.73', '0.72', '0.77']
Accuracy Mean: 0.731424230337143
Accuracy Std: 0.04095614945257118
Min y_pred: ['-1.02', '-1.02', '-1.01', '-1.02', '-1.02', '-1.02', '-1.01', '-1.03', '-1.02', '-1.01']
Max y_pred: ['-0.90', '-0.92', '-0.90', '-0.91', '-0.90', '-0.86', '-0.92', '-0.85', '-0.85', '-0.89']
Min NTK_labeled: ['0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02']
Max NTK_labeled: ['0.50', '1.03', '0.66', '0.40', '0.96', '1.13', '0.62', '0.83', '0.59', '0.82']
Min NTK_unlabeled: ['0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02', '0.02']
Max NTK_unlabeled: ['0.46', '0.33', '0.27', '0.36', '0.27', '0.73', '0.25', '0.31', '0.31', '0.33']
Condition: ['40', '47', '40', '40', '41', '44', '40', '41', '41', '43']


In [37]:
model_params["regularizer"] = 1
data_params["dataset"] = "cora"
model_params["pred_method"] = "krr"
n_seeds = 2
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params)

  X = torch.tensor(X, dtype=other_params["dtype"], device=device)
  A = torch.tensor(A, dtype=other_params["dtype"], device=device)


number of samples
 - labeled: 140 
 - val: 140 
 - test: 273 
 - unlabeled: 2155
number of samples
 - labeled: 140 
 - val: 140 
 - test: 273 
 - unlabeled: 2155
Accuracy: ['0.86', '0.84']
Min y_pred: ['-0.61', '-0.68']
Max y_pred: ['1.54', '2.93']
Min NTK_labeled: ['0.55', '0.78']
Max NTK_labeled: ['41.00', '53.00']
Min NTK_unlabeled: ['0.52', '0.64']
Max NTK_unlabeled: ['28.50', '28.00']
Condition: ['931', '939']
