Import dependencies

In [1]:
from model.layers import GraphSpectralFilterLayer, AnalysisFilter
from model.spectral_filter import Graph
import torch
import torch.nn.functional as F
from torch import nn
from random import seed as rseed
from numpy.random import seed as nseed
from citation import get_dataset, random_planetoid_splits, run
from citation.train_eval import evaluate
import numpy as np


Define hyperparameters

In [2]:
dataset_name = 'CiteSeer'

random_splits = False
runs = 1
epochs =400
alpha = 0.7709619178612326
seed =729
lr =0.00022455151763075903
weight_decay = 7.530100210192558e-05
patience=100
hidden=32
heads =14
dropout=0.6174883141474811
normalize_features =True
pre_training = False
cuda = False
order =15
edge_dropout =0
node_feature_dropout =0
filter_name ='analysis'

rseed(seed)
nseed(seed)
torch.manual_seed(seed)

cuda = cuda and torch.cuda.is_available()

if cuda:
    torch.cuda.manual_seed(seed)
    torch.set_default_tensor_type('torch.cuda.FloatTensor')

def get_correctly_predicted_node_idx(net, key, dataset):
    net.eval()
    with torch.no_grad():
        logits  = net(dataset[0])[0]
    mask = dataset[0]['{}_mask'.format(key)]
    pred = logits[mask].max(1)[1]
    return { *pred.eq(dataset[0].y[mask]).nonzero().view(-1).tolist() }

Define model

In [3]:
class Net(torch.nn.Module):
    def __init__(self, dataset):
        super(Net, self).__init__()
        data = dataset.data
        adj = torch.sparse_coo_tensor(data.edge_index, torch.ones(data.num_edges))
        self.G = Graph(adj)
        self.G.estimate_lmax()

        self.analysis = GraphSpectralFilterLayer(self.G, dataset.num_node_features, hidden,
                                                 dropout=dropout, out_channels=heads, filter=filter_name,
                                                 pre_training=False, device='cuda' if cuda else 'cpu',
                                                 alpha=alpha, order=order, concat=True)

        self.synthesis = GraphSpectralFilterLayer(self.G, hidden * heads, dataset.num_classes, filter=filter_name,
                                                  device='cuda' if cuda else 'cpu', dropout=dropout,
                                                  out_channels=1, alpha=alpha, pre_training=False,
                                                  order=order, concat=False)

    def reset_parameters(self):
        self.analysis.reset_parameters()
        self.synthesis.reset_parameters()

    def forward(self, data):
        x = data.x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att1 = self.analysis(x)
        layer_1 = x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att2 = self.synthesis(x)
        layer_2 = x
        x = F.elu(x)
        return F.log_softmax(x, dim=1), layer_1, layer_2

dataset = get_dataset(dataset_name, normalize_features, edge_dropout=edge_dropout,
                                node_feature_dropout=node_feature_dropout)

if cuda:
    dataset[0].to('cuda')

In [12]:
student_heads = 12
hidden= 32
dropout=0

class StudentNet(torch.nn.Module):
    def __init__(self, dataset):
        super(StudentNet, self).__init__()
        data = dataset.data
        adj = torch.sparse_coo_tensor(data.edge_index, torch.ones(data.num_edges))
        self.G = Graph(adj)
        self.G.estimate_lmax()

        self.analysis = GraphSpectralFilterLayer(self.G, dataset.num_node_features, hidden,
                                                 dropout=dropout, out_channels=student_heads, filter=filter_name,
                                                 pre_training=False, device='cuda' if cuda else 'cpu',
                                                 alpha=alpha, order=order, concat=True)

        self.synthesis = GraphSpectralFilterLayer(self.G, hidden * student_heads, dataset.num_classes, filter=filter_name,
                                                  device='cuda' if cuda else 'cpu', dropout=dropout,
                                                  out_channels=student_heads, alpha=alpha, pre_training=False,
                                                  order=order, concat=False)
    def reset_parameters(self):
        self.analysis.reset_parameters()
        # self.synthesis.reset_parameters()

    def forward(self, data):
        x = data.x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att1 = self.analysis(x)
        layer_1 = x
        x = F.dropout(x, p=dropout, training=self.training)
        x, att2 = self.synthesis(x)
        layer_2 = x
        x = F.elu(x)
        return F.log_softmax(x, dim=1), layer_1, layer_2

Load trained model and evaluate

In [13]:
model = Net(dataset)
model.load_state_dict(torch.load('./model/best_Citeseer_lt_zero.pkl'.format(dataset_name),  map_location={'cuda:0': 'cpu'}))

# model = SingleNet(dataset)
# model.load_state_dict(torch.load('./model/best_{}_single_layer.pkl'.format(dataset_name)))

# filter_kernel = model.analysis.filter_kernel

# model_correct_indices = get_correctly_predicted_node_idx(model, 'test', dataset)
eval_info = evaluate(model, dataset[0])
print(eval_info)

{'train_loss': 0.36703506112098694, 'train_acc': 0.9211822660098522, 'train_micro_f1': 0.9211822660098522, 'train_macro_f1': 0.9157023986911743, 'val_loss': 0.6948285102844238, 'val_acc': 0.796, 'val_micro_f1': 0.796, 'val_macro_f1': 0.7447925714273606, 'test_loss': 0.6948463320732117, 'test_acc': 0.805, 'test_micro_f1': 0.805, 'test_macro_f1': 0.7737372300784445}


In [14]:
def train_student(Model, target_1, target_2=None, lr=lr):
    from torch.optim import Adam
    from sklearn.metrics import f1_score
    student = Model(dataset)
    student.reset_parameters()
    optimizer = Adam(student.parameters(), lr=lr, weight_decay=weight_decay)
    data = dataset.data
    epochs =2000

    for epoch in range(1, epochs + 1):
        optimizer.zero_grad()
        logits, out_2, out_1 = student(data)
        loss = F.mse_loss(out_1[dataset[0].train_mask], target_1[dataset[0].train_mask])
        if target_2 is not None:
            loss += F.mse_loss(out_2[dataset[0].train_mask], target_2[dataset[0].train_mask])
        loss.backward()
        optimizer.step()
        student.train()

        # eval_info['epoch'] = epoch
        if epoch % 10 == 0:
            student.eval()
            outs = {}
            for key in ['train', 'val', 'test']:
                mask = data['{}_mask'.format(key)]
                loss = F.mse_loss(out_1[mask], target_1[mask])
                if target_2 is not None:
                    loss += F.mse_loss(out_2[mask], target_2[mask])
                outs['{}_loss'.format(key)] = loss.item()
                # loss = F.nll_loss(logits[mask], data.y[mask]).item()
                # pred = logits[mask].max(1)[1]
                # outs['{}_loss'.format(key)] = loss
                outs['{}_micro_f1'.format(key)] = f1_score(data.y[mask].cpu(), logits[mask].max(1)[1].cpu(), average='micro')
                # outs['{}_macro_f1'.format(key)] = f1_score(data.y[mask].cpu(), logits[mask].max(1)[1].cpu(), average='macro')

            print(outs)
        # if eval_info['val_acc'] > best_val_acc or eval_info['val_loss'] < best_val_loss:
        #     if eval_info['val_acc'] >= best_val_acc and eval_info['val_loss'] <= best_val_loss:
        #         eval_info_early_model = eval_info
        #         # torch.save(model.state_dict(), './best_{}_appnp.pkl'.format(dataset.name))
        #     best_val_acc = np.max((best_val_acc, eval_info['val_acc']))
        #     best_val_loss = np.min((best_val_loss, eval_info['val_loss']))
        #     bad_counter = 0
        # else:
        #     bad_counter += 1
        #     if bad_counter == patience:
        #         break

In [15]:
with torch.no_grad():
    _, target_1, target_2 = model(dataset[0])

train_student(StudentNet, target_2, lr=0.001)

{'train_loss': 1.900356411933899, 'train_micro_f1': 0.541871921182266, 'val_loss': 1.5766907930374146, 'val_micro_f1': 0.54, 'test_loss': 1.611171007156372, 'test_micro_f1': 0.512}
{'train_loss': 1.3834559917449951, 'train_micro_f1': 0.6661193212917351, 'val_loss': 1.1181159019470215, 'val_micro_f1': 0.676, 'test_loss': 1.1511329412460327, 'test_micro_f1': 0.639}
{'train_loss': 0.7616308331489563, 'train_micro_f1': 0.7454844006568144, 'val_loss': 0.5927627682685852, 'val_micro_f1': 0.7160000000000001, 'test_loss': 0.6137223243713379, 'test_micro_f1': 0.734}
{'train_loss': 0.3622009754180908, 'train_micro_f1': 0.7640941434044882, 'val_loss': 0.2867269217967987, 'val_micro_f1': 0.74, 'test_loss': 0.2945309579372406, 'test_micro_f1': 0.761}
{'train_loss': 0.2052546739578247, 'train_micro_f1': 0.78544061302682, 'val_loss': 0.19528210163116455, 'val_micro_f1': 0.764, 'test_loss': 0.18532991409301758, 'test_micro_f1': 0.764}
{'train_loss': 0.14457248151302338, 'train_micro_f1': 0.81554460864