### Import required libraries

In [1]:
import time
import argparse
import numpy as np
import mxnet as mx
from mxnet import autograd, gluon
from utils import load_data, accuracy
from model import GCN
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline

### Check data

In [2]:
cite_data = pd.read_csv('/home/ec2-user/SageMaker/graph-cnn.mxnet/data/cora/cora.cites', sep = '\t', header=None)
cite_data.columns = ['cited', 'citing']
cite_data.head()

Unnamed: 0,cited,citing
0,35,1033
1,35,103482
2,35,103515
3,35,1050679
4,35,1103960


In [3]:
content_data = pd.read_csv('/home/ec2-user/SageMaker/graph-cnn.mxnet/data/cora/cora.content', sep = '\t', header=None)
content_data.head(n=10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1425,1426,1427,1428,1429,1430,1431,1432,1433,1434
0,31336,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Neural_Networks
1,1061127,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,Rule_Learning
2,1106406,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
3,13195,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Reinforcement_Learning
4,37879,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Probabilistic_Methods
5,1126012,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,Probabilistic_Methods
6,1107140,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Theory
7,1102850,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Neural_Networks
8,31349,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Neural_Networks
9,1106418,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Theory


In [4]:
# encode class labels 
cls_labels = dict()
for i, c in enumerate(set(content_data.iloc[:,1434].tolist())):
    cls_labels[c] = i
    
cls_labels

{'Theory': 0,
 'Case_Based': 1,
 'Genetic_Algorithms': 2,
 'Neural_Networks': 3,
 'Reinforcement_Learning': 4,
 'Probabilistic_Methods': 5,
 'Rule_Learning': 6}

### Create and visualize the network

In [5]:
G = nx.from_pandas_edgelist(cite_data, source='citing', target='cited',  create_using=nx.DiGraph())
print(nx.info(G))

Name: 
Type: DiGraph
Number of nodes: 2708
Number of edges: 5429
Average in degree:   2.0048
Average out degree:   2.0048


In [6]:
#### calculate node positions for visualization
#pos = nx.spring_layout(G, iterations=100)

#### color coding nodes based on their classes
#node_color = [cls_labels[content_data.ix[content_data.ix[:,0]==node,1434].tolist()[0]] for node in G.nodes()]

#fig = plt.figure(figsize=(50,50))
#plt.axis("off")
#plt.title('Citation Network', fontsize=50)
#nx.draw_networkx(G, pos = pos, cmap = plt.get_cmap("jet"), node_color = node_color, with_labels = False, width = 0.8, linewidths=0.1)
#plt.show()
#fig.savefig('citation_net.png')

![Citation Network](citation_network.png)

### Set up context

In [7]:
ctx = mx.gpu(0) 

### Hyperparameters

In [8]:
lr = 0.01
seed = 42
dropout = 0.0
hidden = 16
epochs = 200
weight_decay = 5e-4
optim = 'adam'
loss = gluon.loss.SoftmaxCrossEntropyLoss() # Original implementation uses Negative Log Likelihood

### Set seed for random number generators in numpy and mxnet

In [9]:
np.random.seed(seed)
mx.random.seed(seed)

adj, features, labels, idx_train, idx_val, idx_test = load_data(ctx=ctx)

model = GCN(nfeat=features.shape[1],
            nhid=hidden,
            nclass=int(labels.max().asnumpy().item()) + 1,
            dropout=dropout)

model.collect_params().initialize(ctx=ctx)

trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': lr,})

Loading cora dataset...


### Training process

In [10]:
accs = []

for epoch in range(epochs):
    t = time.time()
    with autograd.record():
        output = model(features, adj)
        loss_train = loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])
        accs.append(acc_train)
        loss_train.backward()

    if epoch%20==0:
        print('Epoch: {:04d}'.format(epoch+1),
              'loss_train: {:.4f}'.format(np.mean(loss_train.asnumpy())),
              'acc_train: {:.4f}'.format(acc_train),
              'time: {:.4f}s'.format(time.time() - t))

    trainer.step(1)

print(
    'Training Accuracy: ', accuracy(output[idx_train], labels[idx_train]),'\n',
    'Validation Accuracy: ', accuracy(output[idx_val], labels[idx_val]),'\n',
    'Test Accuracy: ', accuracy(output[idx_test], labels[idx_test])
)

Epoch: 0001 loss_train: 4.4815 acc_train: 0.0571 time: 0.0113s
Epoch: 0021 loss_train: 1.7735 acc_train: 0.2357 time: 0.0055s
Epoch: 0041 loss_train: 1.5616 acc_train: 0.6286 time: 0.0055s
Epoch: 0061 loss_train: 1.3465 acc_train: 0.6500 time: 0.0056s
Epoch: 0081 loss_train: 1.1046 acc_train: 0.7429 time: 0.0048s
Epoch: 0101 loss_train: 0.8490 acc_train: 0.8429 time: 0.0052s
Epoch: 0121 loss_train: 0.6290 acc_train: 0.9286 time: 0.0048s
Epoch: 0141 loss_train: 0.4656 acc_train: 0.9429 time: 0.0050s
Epoch: 0161 loss_train: 0.3504 acc_train: 0.9571 time: 0.0048s
Epoch: 0181 loss_train: 0.2687 acc_train: 0.9714 time: 0.0048s
Training Accuracy:  0.9785714285714285 
 Validation Accuracy:  0.82 
 Test Accuracy:  0.788
