Import dependencies

In [1]:
from model.layers import GraphSpectralFilterLayer, AnalysisFilter
from model.spectral_filter import Graph
import torch
import torch.nn.functional as F
from torch import nn
from random import seed as rseed
from numpy.random import seed as nseed
from citation import get_dataset, random_planetoid_splits, run
from citation.train_eval import evaluate
import numpy as np


Define hyperparameters

In [2]:
dataset_name = 'Cora'

random_splits = False
runs = 1
alpha = 0.2
seed =729
weight_decay = 0.00012376256876336363
patience=10
hidden=32
lr =0.001
dropout=0.7
heads =12
output_heads =10
normalize_features = True
pre_training = False
cuda = False
chebyshev_order =16
edge_dropout =0
node_feature_dropout =0
filter_name ='analysis'

rseed(seed)
nseed(seed)
torch.manual_seed(seed)

cuda = cuda and torch.cuda.is_available()

if cuda:
    torch.cuda.manual_seed(seed)
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    
def get_correctly_predicted_node_idx(net, key, dataset):
    net.eval()
    with torch.no_grad():
        logits  = net(dataset[0])[0]
    mask = dataset[0]['{}_mask'.format(key)]
    pred = logits[mask].max(1)[1]
    return { *pred.eq(dataset[0].y[mask]).nonzero().view(-1).tolist() }

Define model

In [3]:
class Net(torch.nn.Module):
    def __init__(self, dataset):
        super(Net, self).__init__()
        data = dataset.data
        adj = torch.sparse_coo_tensor(data.edge_index, torch.ones(data.num_edges))
        self.G = Graph(adj)
        self.G.estimate_lmax()

        self.analysis = GraphSpectralFilterLayer(self.G, dataset.num_node_features, hidden,
                                                 dropout=dropout, out_channels=heads, filter=filter_name,
                                                 pre_training=False, device='cuda' if cuda else 'cpu',
                                                 alpha=alpha, chebyshev_order=chebyshev_order, concat=True)

        self.synthesis = GraphSpectralFilterLayer(self.G, hidden * heads, dataset.num_classes, filter=filter_name,
                                                  device='cuda' if cuda else 'cpu', dropout=dropout,
                                                  out_channels=1, alpha=alpha, pre_training=False,
                                                  chebyshev_order=chebyshev_order, concat=False)

    def reset_parameters(self):
        self.analysis.reset_parameters()
        self.synthesis.reset_parameters()

    def forward(self, data):
        x = data.x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att1 = self.analysis(x)
        layer_1 = x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att2 = self.synthesis(x)
        layer_2 = x
        x = F.elu(x)
        return F.log_softmax(x, dim=1), layer_1, layer_2

dataset = get_dataset(dataset_name, normalize_features, edge_dropout=edge_dropout,
                                node_feature_dropout=node_feature_dropout)

if cuda:
    dataset[0].to('cuda')


Load trained model and evaluate

In [4]:
model = Net(dataset)
model.load_state_dict(torch.load('./model/best_{}.pkl'.format(dataset_name),  map_location={'cuda:0': 'cpu'}))

# model = SingleNet(dataset)
# model.load_state_dict(torch.load('./model/best_{}_single_layer.pkl'.format(dataset_name)))

# filter_kernel = model.analysis.filter_kernel

# model_correct_indices = get_correctly_predicted_node_idx(model, 'test', dataset)
eval_info = evaluate(model, dataset[0])
print(eval_info)

{'train_loss': 0.1689869910478592, 'train_acc': 0.9602649006622517, 'train_micro_f1': 0.9602649006622517, 'train_macro_f1': 0.9595082349503755, 'val_loss': 0.386290043592453, 'val_acc': 0.88, 'val_micro_f1': 0.88, 'val_macro_f1': 0.8731523860977931, 'test_loss': 0.39568036794662476, 'test_acc': 0.871, 'test_micro_f1': 0.871, 'test_macro_f1': 0.8612771708622864}


In [5]:
student_heads = 12
hidden= 32
dropout=0.4

class StudentNet(torch.nn.Module):
    def __init__(self, dataset):
        super(StudentNet, self).__init__()
        data = dataset.data
        adj = torch.sparse_coo_tensor(data.edge_index, torch.ones(data.num_edges))
        self.G = Graph(adj)
        self.G.estimate_lmax()

        self.analysis = GraphSpectralFilterLayer(self.G, dataset.num_node_features, hidden,
                                                 dropout=dropout, out_channels=student_heads, filter=filter_name,
                                                 pre_training=False, device='cuda' if cuda else 'cpu',
                                                 alpha=alpha, chebyshev_order=chebyshev_order, concat=True)

        self.synthesis = GraphSpectralFilterLayer(self.G, hidden * student_heads, dataset.num_classes, filter=filter_name,
                                                  device='cuda' if cuda else 'cpu', dropout=dropout,
                                                  out_channels=student_heads, alpha=alpha, pre_training=False,
                                                  chebyshev_order=chebyshev_order, concat=False)
    def reset_parameters(self):
        self.analysis.reset_parameters()
        self.synthesis.reset_parameters()

    def forward(self, data):
        x = data.x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att1 = self.analysis(x)
        layer_1 = x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att2 = self.synthesis(x)
        layer_2 = x
        x = F.elu(x)
        return F.log_softmax(x, dim=1), layer_1, layer_2

In [6]:
def train_student(Model, target_1, target_2=None, lr=lr):
    from torch.optim import Adam
    from sklearn.metrics import f1_score
    student = Model(dataset)
    student.reset_parameters()
    optimizer = Adam(student.parameters(), lr=lr, weight_decay=weight_decay)
    data = dataset.data
    epochs =2000

    for epoch in range(1, epochs + 1):
        optimizer.zero_grad()
        logits, out_2, out_1 = student(data)
        loss = F.mse_loss(out_1[dataset[0].train_mask], target_1[dataset[0].train_mask])
        if target_2 is not None:
            loss += F.mse_loss(out_2[dataset[0].train_mask], target_2[dataset[0].train_mask])
        loss.backward()
        optimizer.step()
        student.train()

        # eval_info['epoch'] = epoch
        if epoch % 10 == 0:
            student.eval()
            outs = {}
            outs['loss'] = loss.item()
            for key in ['train', 'val', 'test']:
                mask = data['{}_mask'.format(key)]
                loss = F.nll_loss(logits[mask], data.y[mask]).item()
                pred = logits[mask].max(1)[1]

                outs['{}_loss'.format(key)] = loss

                outs['{}_micro_f1'.format(key)] = f1_score(data.y[mask].cpu(), logits[mask].max(1)[1].cpu(), average='micro')
                outs['{}_macro_f1'.format(key)] = f1_score(data.y[mask].cpu(), logits[mask].max(1)[1].cpu(), average='macro')

            print(outs)
        # if eval_info['val_acc'] > best_val_acc or eval_info['val_loss'] < best_val_loss:
        #     if eval_info['val_acc'] >= best_val_acc and eval_info['val_loss'] <= best_val_loss:
        #         eval_info_early_model = eval_info
        #         # torch.save(model.state_dict(), './best_{}_appnp.pkl'.format(dataset.name))
        #     best_val_acc = np.max((best_val_acc, eval_info['val_acc']))
        #     best_val_loss = np.min((best_val_loss, eval_info['val_loss']))
        #     bad_counter = 0
        # else:
        #     bad_counter += 1
        #     if bad_counter == patience:
        #         break

In [None]:
with torch.no_grad():
    _, target_1, target_2 = model(dataset[0])

train_student(StudentNet, target_2)

{'loss': 3.919919967651367, 'train_loss': 1.7399733066558838, 'train_micro_f1': 0.3029801324503311, 'train_macro_f1': 0.13771887088613244, 'val_loss': 1.727547287940979, 'val_micro_f1': 0.32, 'val_macro_f1': 0.13664554334615403, 'test_loss': 1.7215667963027954, 'test_micro_f1': 0.338, 'test_macro_f1': 0.15621878316250465}
{'loss': 3.0983753204345703, 'train_loss': 1.420284628868103, 'train_micro_f1': 0.35513245033112584, 'train_macro_f1': 0.22148689214979386, 'val_loss': 1.424119472503662, 'val_micro_f1': 0.372, 'val_macro_f1': 0.19926486057613757, 'test_loss': 1.4157347679138184, 'test_micro_f1': 0.382, 'test_macro_f1': 0.2249923558005067}
{'loss': 2.2616498470306396, 'train_loss': 1.049644947052002, 'train_micro_f1': 0.6539735099337748, 'train_macro_f1': 0.6072161378797253, 'val_loss': 1.0941120386123657, 'val_micro_f1': 0.626, 'val_macro_f1': 0.5615257773927025, 'test_loss': 1.0828052759170532, 'test_micro_f1': 0.639, 'test_macro_f1': 0.5807568224060097}
{'loss': 1.5315266847610474,

In [None]:
from torch_geometric.nn import GATConv
class GAT(torch.nn.Module):
    def __init__(self, dataset):
        super(GAT, self).__init__()
        self.conv1 = GATConv(
            dataset.num_features,
            8,
            heads=8,
            dropout=0.6)
        self.conv2 = GATConv(
            64,
            dataset.num_classes,
            heads=1,
            concat=False,
            dropout=0.6)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.dropout(x, p=0.6, training=self.training)
        x, (edge_index_1, att_val_1) = self.conv1(x, edge_index, return_attention_weights=True)
        layer_1 = x
        x = F.elu(x)
        x = F.dropout(x, p=0.6, training=self.training)
        x, (edge_index_2, att_val_2) = self.conv2(x, edge_index, return_attention_weights=True)
        layer_2 = x
        return F.log_softmax(x, dim=1), layer_1, layer_2

gat = GAT(dataset)
gat.load_state_dict(torch.load('./model/best_{}_public_gat.pkl'.format(dataset_name)))
evaluate(gat, dataset[0])

In [None]:
with torch.no_grad():
    _, gat_target_1, gat_target_2 = gat(dataset[0])

train_student(StudentNet, gat_target_2, gat_target_1)

In [None]:
# class MLP(nn.Module):
#     def __init__(self, out_channel):
#         super(MLP, self).__init__()
#         self.out_channel = out_channel
#         self.layers = nn.Sequential(nn.Linear(dataset.num_node_features, 128),
#                                     nn.ReLU(inplace=True),
#                                     nn.Linear(128, 64),
#                                     nn.ReLU(inplace=True),
#                                     nn.Linear(64, 32),
#                                     nn.ReLU(inplace=True),
#                                     nn.Linear(32, dataset.num_classes),
#                                     nn.ReLU(inplace=True))
#
#     def reset_parameters(self):
#         for layer in self.layers:
#             if hasattr(layer, 'reset_parameters'):
#                 layer.reset_parameters()
#
#     def forward(self, data):
#         x = self.layers(data.x)
#         return F.log_softmax(x, dim=1), x, None
#
#

In [None]:
# train_student(MLP, gat_soft_target)
#

In [None]:
# from torch_geometric.nn import SGConv
#
# class SGC(torch.nn.Module):
#     def __init__(self, dataset):
#         super(SGC, self).__init__()
#         self.conv1 = SGConv(
#             dataset.num_features, dataset.num_classes, K=2, cached=True)
#
#     def reset_parameters(self):
#         self.conv1.reset_parameters()
#
#     def forward(self, data):
#         x, edge_index = data.x, data.edge_index
#         x = self.conv1(x, edge_index)
#         return F.log_softmax(x, dim=1), x
#
# dataset_unnormalized = get_dataset(dataset_name, False, edge_dropout=edge_dropout,
#                                 node_feature_dropout=node_feature_dropout)
# sgc = SGC(dataset_unnormalized)
# sgc.load_state_dict(torch.load('./model/best_{}_sgc.pkl'.format(dataset_name)))
# print(evaluate(sgc, dataset_unnormalized[0]))
#
# with torch.no_grad():
#     sgc_soft_target = sgc(dataset_unnormalized[0])[1]
#

In [None]:
# train_student(StudentNet, sgc_soft_target)
#