# NTK classifier for CSBM

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np
import src.models
from exp_ntk import run

In [3]:
def get_str_l(l, precision=2):
    l_str = []
    for el in l:
        l_str.append(f"{el:.{precision}f}")
    return l_str
    

In [54]:
seed = 0

data_params = dict(
    dataset = "csbm",
    learning_setting = "inductive", # or "transdructive"
    specification = dict(
        classes = 2,
        n_trn_labeled = 40,
        n_trn_unlabeled = 0,
        n_val = 40,
        n_test = 1000,
        sigma = 1,
        avg_within_class_degree = 1.58 * 2,
        avg_between_class_degree = 0.37 * 2,
        K = 1.5,
        seed = 0 # used to generate the dataset & data split
    )
)

model_params = dict(
    label = "GCN",
    model = "GCN",
    normalization = "row_normalization",
    depth = 1,
    #regularizer = 1e-8
    regularizer = 1,
    pred_method = "svm",
    alpha_tol = 1e-4,
    bias = True,
)

verbosity_params = dict(
    debug_lvl = "warning"
)  

other_params = dict(
    device = "0",
    dtype = torch.float64,
    allow_tf32 = False,
    debug = False
)

In [5]:
def run_exp(n_seeds, data_params, model_params, verbosity_params, other_params):
    acc_l = []
    min_ypred = []
    max_ypred = []
    cond = []
    min_ntklabeled = []
    max_ntklabeled = []
    min_ntkunlabeled = []
    max_ntkunlabeled = []
    for seed in range(n_seeds):
        data_params["specification"]["seed"] = seed
        res = run(data_params, model_params, verbosity_params, other_params, seed)
        acc_l.append(res["accuracy"])
        min_ypred.append(res["min_ypred"])
        max_ypred.append(res["max_ypred"])
        min_ntklabeled.append(res["min_ntklabeled"])
        max_ntklabeled.append(res["max_ntklabeled"])
        min_ntkunlabeled.append(res["min_ntkunlabeled"])
        max_ntkunlabeled.append(res["max_ntkunlabeled"])
        cond.append(res["cond"])
    print(f"Accuracy: {get_str_l(acc_l)}")
    print(f"Min y_pred: {get_str_l(min_ypred)}")
    print(f"Max y_pred: {get_str_l(max_ypred)}")
    print(f"Min NTK_labeled: {get_str_l(min_ntklabeled)}")
    print(f"Max NTK_labeled: {get_str_l(max_ntklabeled)}")
    print(f"Min NTK_unlabeled: {get_str_l(min_ntkunlabeled)}")
    print(f"Max NTK_unlabeled: {get_str_l(max_ntkunlabeled)}")
    print(f"Condition: {get_str_l(cond, precision=0)}")

In [57]:
data_params["learning_setting"] = "inductive"
model_params["regularizer"] = 1
model_params["pred_method"] = "svm"
model_params["solver"] = "cvxopt"
model_params["alpha_tol"] = 1e-4
model_params["bias"] = False
run(data_params, model_params, verbosity_params, other_params, seed)

CSBM mu:
[0.15990053 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053
 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053
 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053
 0.15990053 0.15990053 0.15990053 0.15990053]


{'accuracy': 0.8610000610351562,
 'min_ypred': -4.288663579594357,
 'max_ypred': 4.114407388931128,
 'min_ntklabeled': -2.7836303410421457,
 'max_ntklabeled': 83.08263713314018,
 'min_ntkunlabeled': -0.964955620036918,
 'max_ntkunlabeled': 13.331564131697945,
 'cond': 544.1500695342672}

In [55]:
data_params["learning_setting"] = "inductive"
model_params["regularizer"] = 1
model_params["pred_method"] = "svm"
model_params["solver"] = "sklearn"
run(data_params, model_params, verbosity_params, other_params, seed)

CSBM mu:
[0.15990053 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053
 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053
 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053 0.15990053
 0.15990053 0.15990053 0.15990053 0.15990053]


{'accuracy': 0.8560000658035278,
 'min_ypred': -4.273266330950973,
 'max_ypred': 3.8556140409761834,
 'min_ntklabeled': -2.7836303410421457,
 'max_ntklabeled': 83.08263713314018,
 'min_ntkunlabeled': -0.964955620036918,
 'max_ntkunlabeled': 13.331564131697945,
 'cond': 544.1500695342672}

# SVM

In [41]:
data_params["learning_setting"] = "inductive"
data_params["n_trn_labeled"] = 400
model_params["regularizer"] = 1
model_params["pred_method"] = "svm"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params)

Accuracy: ['0.83', '0.88', '0.87', '0.92', '0.87', '0.83', '0.89', '0.82', '0.87', '0.84']
Min y_pred: ['0.00', '0.00', '0.00', '0.00', '0.00', '0.00', '0.00', '0.00', '0.00', '0.00']
Max y_pred: ['1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00', '1.00']
Min NTK_labeled: ['-2.33', '-2.18', '-2.36', '-1.55', '-2.12', '-2.03', '-2.79', '-1.78', '-2.06', '-2.21']
Max NTK_labeled: ['66.91', '89.01', '84.26', '75.37', '86.18', '75.19', '79.51', '79.99', '71.35', '71.51']
Min NTK_unlabeled: ['-2.20', '-1.44', '-1.22', '-1.67', '-1.97', '-0.74', '-1.43', '-1.56', '-0.87', '-0.94']
Max NTK_unlabeled: ['26.98', '24.61', '31.31', '24.82', '22.27', '19.04', '20.63', '19.59', '19.51', '15.99']
Condition: ['1739', '1704', '1735', '1659', '1711', '1776', '1664', '1722', '1633', '1631']


# KRR

## Using LU factorization as solver

In [38]:
data_params["learning_setting"] = "inductive"
data_params["n_trn_labeled"] = 400
model_params["regularizer"] = 1
model_params["pred_method"] = "krr"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params)

Accuracy: ['0.85', '0.85', '0.88', '0.90', '0.90', '0.84', '0.90', '0.84', '0.88', '0.85']
Min y_pred: ['-16.39', '-19.15', '-46.83', '-20.38', '-30.73', '-24.61', '-24.70', '-27.68', '-23.36', '-26.91']
Max y_pred: ['16.31', '33.49', '24.81', '35.88', '30.23', '23.11', '33.83', '23.28', '21.78', '32.14']
Min NTK_labeled: ['-2.33', '-2.18', '-2.36', '-1.55', '-2.12', '-2.03', '-2.79', '-1.78', '-2.06', '-2.21']
Max NTK_labeled: ['66.91', '89.01', '84.26', '75.37', '86.18', '75.19', '79.51', '79.99', '71.35', '71.51']
Min NTK_unlabeled: ['-2.20', '-1.44', '-1.22', '-1.67', '-1.97', '-0.74', '-1.43', '-1.56', '-0.87', '-0.94']
Max NTK_unlabeled: ['26.98', '24.61', '31.31', '24.82', '22.27', '19.04', '20.63', '19.59', '19.51', '15.99']
Condition: ['1739', '1704', '1735', '1659', '1711', '1776', '1664', '1722', '1633', '1631']


## Using QR factorization as solver

In [40]:
data_params["learning_setting"] = "inductive"
data_params["n_trn_labeled"] = 400
model_params["regularizer"] = 1
model_params["solver"] = "QR"
model_params["pred_method"] = "krr"
n_seeds = 10
run_exp(n_seeds, data_params, model_params, verbosity_params, other_params)

Accuracy: ['0.85', '0.85', '0.88', '0.90', '0.90', '0.84', '0.90', '0.84', '0.88', '0.85']
Min y_pred: ['-16.39', '-19.15', '-46.83', '-20.38', '-30.73', '-24.61', '-24.70', '-27.68', '-23.36', '-26.91']
Max y_pred: ['16.31', '33.49', '24.81', '35.88', '30.23', '23.11', '33.83', '23.28', '21.78', '32.14']
Min NTK_labeled: ['-2.33', '-2.18', '-2.36', '-1.55', '-2.12', '-2.03', '-2.79', '-1.78', '-2.06', '-2.21']
Max NTK_labeled: ['66.91', '89.01', '84.26', '75.37', '86.18', '75.19', '79.51', '79.99', '71.35', '71.51']
Min NTK_unlabeled: ['-2.20', '-1.44', '-1.22', '-1.67', '-1.97', '-0.74', '-1.43', '-1.56', '-0.87', '-0.94']
Max NTK_unlabeled: ['26.98', '24.61', '31.31', '24.82', '22.27', '19.04', '20.63', '19.59', '19.51', '15.99']
Condition: ['1739', '1704', '1735', '1659', '1711', '1776', '1664', '1722', '1633', '1631']


In [42]:
import networkx as nx

In [69]:
A = np.array([[0, 0, 1, 1],[0,0,0,0], [1, 0, 0, 0], [1, 0, 0, 0]])
G = nx.from_numpy_array(A)
X = np.array([[0,0],[3, 4], [-3, -5], [100, 200]])
y = [-1, 1, 1, 0]
for n in G:
    G.nodes[n]["features"] = X[n, :]
    G.nodes[n]["label"] = y[n]
print(G.nodes.data())
G = max(nx.connected_components(G), key=len)
print(list(G))

[(0, {'features': array([0, 0]), 'label': -1}), (1, {'features': array([3, 4]), 'label': 1}), (2, {'features': array([-3, -5]), 'label': 1}), (3, {'features': array([100, 200]), 'label': 0})]
[0, 2, 3]
