In [32]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from utils import load_citation, sgc_precompute, set_seed, rw_restart_precompute, test_split
from models import get_model
from metrics import accuracy
import pickle as pkl
from args import get_citation_args
from time import perf_counter

# Arguments
args = get_citation_args()
set_seed(args.seed, args.cuda)
adj, features, labels, idx_train, idx_val, idx_test = load_citation(args.dataset, args.normalization, args.cuda, gamma=args.gamma, alpha=alpha)

In [59]:
args.degree = 20
args.normalization = 'RWalkRestart'
args.epochs = 400

In [60]:
args

Namespace(concat=False, cuda=False, dataset='cora', degree=20, dropout=0, epochs=400, experiment='base-experiment', feature='mul', gamma=1.0, hidden=0, lr=0.2, model='SGC', multi_scale=False, multiply_degree=False, no_cuda=False, normalization='RWalkRestart', per=-1, seed=42, tuned=False, weight_decay=5e-06)

In [None]:
from citation import train_regression, test_regression
print('args.degree is {}'.format(args.degree))
cora_results = []
for degree in np.arange(5,31,5):
    args.degree = degree
    print('------------args.degree = {}------------'.format(args.degree))
    for alpha in [i/100 for i in range(1,26)]:
    # for alpha in [0.05]:
        cur_features, precompute_time = rw_restart_precompute(features, adj, args.degree, alpha, args.concat)
        model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.dropout, args.cuda)

        k_fold = True
        acc_test_list = []
        acc_val_list = []
        train_time_list = []
        if k_fold == True:
            idx_splits = test_split(args.dataset)
            idx_splits.append({'train_idx':idx_train,'val_idx':idx_val, 'test_idx':idx_test})
            for idxs in idx_splits:
                idx_train, idx_val, idx_test = idxs['train_idx'], idxs['val_idx'], idxs['test_idx']
                model = get_model(args.model, features.size(1), labels.max().item() + 1, args.hidden, args.dropout,
                                  args.cuda)
                model, cur_acc_val, cur_train_time = train_regression(model, cur_features[idx_train], labels[idx_train], cur_features[idx_val], labels[idx_val],
                                                              args.epochs, args.weight_decay, args.lr, args.dropout)
                cur_acc_test = test_regression(model, cur_features[idx_test], labels[idx_test])
                acc_test_list.append(cur_acc_test)
                acc_val_list.append(cur_acc_val)
                train_time_list.append(cur_train_time)
            acc_test = np.average(acc_test_list)
            acc_val = np.average(acc_val_list)
            train_time = np.average(train_time_list)
        print('-------alpha = {}-----------'.format(alpha))
        print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val, acc_test))
        cora_results.append({'degree':args.degree, 'alpha':alpha,'acc_val':acc_val,'acc_test':acc_test})
        print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".format(precompute_time, train_time, precompute_time+train_time))


args.degree is 10
------------args.degree = 5------------
-------alpha = 0.01-----------
Validation Accuracy: 0.8062 Test Accuracy: 0.8108
Pre-compute time: 0.3726s, train time: 0.1866s, total: 0.5592s
-------alpha = 0.02-----------
Validation Accuracy: 0.8065 Test Accuracy: 0.8115
Pre-compute time: 0.3949s, train time: 0.1899s, total: 0.5848s
-------alpha = 0.03-----------
Validation Accuracy: 0.8084 Test Accuracy: 0.8118
Pre-compute time: 0.4379s, train time: 0.1920s, total: 0.6299s
-------alpha = 0.04-----------
Validation Accuracy: 0.8085 Test Accuracy: 0.8120
Pre-compute time: 0.4460s, train time: 0.2056s, total: 0.6517s
-------alpha = 0.05-----------
Validation Accuracy: 0.8085 Test Accuracy: 0.8129
Pre-compute time: 0.4426s, train time: 0.2042s, total: 0.6468s
-------alpha = 0.06-----------
Validation Accuracy: 0.8080 Test Accuracy: 0.8130
Pre-compute time: 0.3991s, train time: 0.1955s, total: 0.5945s
-------alpha = 0.07-----------
Validation Accuracy: 0.8084 Test Accuracy: 0.81

-------alpha = 0.07-----------
Validation Accuracy: 0.8144 Test Accuracy: 0.8195
Pre-compute time: 1.0675s, train time: 0.2033s, total: 1.2708s
-------alpha = 0.08-----------
Validation Accuracy: 0.8140 Test Accuracy: 0.8186
Pre-compute time: 1.0932s, train time: 0.1979s, total: 1.2911s
-------alpha = 0.09-----------
Validation Accuracy: 0.8127 Test Accuracy: 0.8165
Pre-compute time: 1.0966s, train time: 0.1992s, total: 1.2959s
-------alpha = 0.1-----------
Validation Accuracy: 0.8135 Test Accuracy: 0.8167
Pre-compute time: 1.0434s, train time: 0.2001s, total: 1.2436s
-------alpha = 0.11-----------
Validation Accuracy: 0.8131 Test Accuracy: 0.8160
Pre-compute time: 1.0332s, train time: 0.1989s, total: 1.2321s
-------alpha = 0.12-----------
Validation Accuracy: 0.8111 Test Accuracy: 0.8144
Pre-compute time: 1.0861s, train time: 0.2020s, total: 1.2881s
-------alpha = 0.13-----------
Validation Accuracy: 0.8085 Test Accuracy: 0.8134
Pre-compute time: 1.1004s, train time: 0.2029s, total: 1

[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24]


In [None]:
# Provide a figure 
import matplotlib
import matplotlib.pyplot as plt
alpha_list = [i/100 for i in range(1,26)]
k_acc_test = {}
for record in cora_results:
    k = record['degree']
    if k not in k_acc_test.keys():
        k_acc_test[k] = [] 
    k_acc_test[k].append(record['acc_test'])

for k in k_acc_test.keys():
    plt.plot(alpha_list, k_acc_test[k],label='k={}'.format(k))
# plt.plot(alpha_list, AugNormAdj_test_acc_list,label='AugNormAdj')
plt.xlabel('alpha')
plt.ylabel('test_acc')
plt.grid()
plt.legend()

# fig.savefig("test.png")
plt.show()

In [30]:
alpha = 0.05
features, precompute_time = rw_restart_precompute(features, adj, args.degree, alpha, args.concat)

print("{:.4f}s".format(precompute_time))

model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.dropout, args.cuda)

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=args.epochs, weight_decay=args.weight_decay,
                     lr=args.lr, dropout=args.dropout):

    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)
    t = perf_counter()
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        if args.model == 'SGC':
            output = model(train_features)
        # if args.model == 'GCN':
        #     output = model(adj, train_features)
        loss_train = F.cross_entropy(output, train_labels)
        loss_train.backward()
        optimizer.step()
    train_time = perf_counter()-t

    with torch.no_grad():
        model.eval()
        output = model(val_features)
        acc_val = accuracy(output, val_labels)

    return model, acc_val, train_time

def test_regression(model, test_features, test_labels):
    model.eval()
    return accuracy(model(test_features), test_labels)

if args.model == "SGC":
    k_fold = True
    acc_test_list = []
    acc_val_list = []
    train_time_list = []
    if k_fold == True:
        idx_splits = test_split(args.dataset)
        idx_splits.append({'train_idx':idx_train,'val_idx':idx_val, 'test_idx':idx_test})
        for idxs in idx_splits:
            idx_train, idx_val, idx_test = idxs['train_idx'], idxs['val_idx'], idxs['test_idx']
            model = get_model(args.model, features.size(1), labels.max().item() + 1, args.hidden, args.dropout,
                              args.cuda)
            model, cur_acc_val, cur_train_time = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                                                          args.epochs, args.weight_decay, args.lr, args.dropout)
            cur_acc_test = test_regression(model, features[idx_test], labels[idx_test])
            acc_test_list.append(cur_acc_test)
            acc_val_list.append(cur_acc_val)
            train_time_list.append(cur_train_time)
        acc_test = np.average(acc_test_list)
        acc_val = np.average(acc_val_list)
        train_time = np.average(train_time_list)
    else:
        model, acc_val, train_time = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                                                      args.epochs, args.weight_decay, args.lr, args.dropout)
        acc_test = test_regression(model, features[idx_test], labels[idx_test])

if args.model == "GCN":
    model, acc_val, train_time = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],
                                                  args.epochs, args.weight_decay, args.lr, args.dropout)
    acc_test = test_regression(model, features[idx_test], labels[idx_test])

print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val, acc_test))
print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".format(precompute_time, train_time, precompute_time+train_time))


1.2645s
Validation Accuracy: 0.6180 Test Accuracy: 0.6100
Pre-compute time: 1.2645s, train time: 0.0501s, total: 1.3146s
