In [2]:
import dgl
%matplotlib inline
import networkx as nx 
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import os
os.chdir('../graphwave/')

import matplotlib.pyplot as plt
import graphwave
from graphwave.shapes import build_graph

import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

width_basis = 200

### 1. Choose the basis (cycle, torus or chain)
basis_type = "cycle" 

### 2. Add the shapes 
n_shapes = 30  
list_shapes = [["house"]] * n_shapes + [["fan"]] * n_shapes + [["star"]] * n_shapes

### 3. Pass all these parameters to the Graph Structure
add_edges = 0 # random edges to add
G, communities, _ , role_id = build_graph.build_structure(width_basis, basis_type, list_shapes, start=0,
                                       add_random_edges=add_edges, plot=False,
                                       savefig=False)
d = dict(zip(np.unique(role_id), range(len(np.unique(role_id)))))
labels = np.array([d[i] for i in role_id])

In [3]:
from dgl.data import citation_graph as citegrh
import networkx as nx
def load_cora_data():
    data = citegrh.load_cora()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    train_mask = th.BoolTensor(data.train_mask)
    test_mask = th.BoolTensor(data.test_mask)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    return g, features, labels, train_mask, test_mask

g, features, labels, train_mask, test_mask = load_cora_data()

In [4]:
print('We have %d nodes.' % g.number_of_nodes())
print('We have %d edges.' % g.number_of_edges())

We have 2708 nodes.
We have 13264 edges.


In [20]:
import torch.nn as nn
import torch.nn.functional as F

# Define the message and reduce function
# NOTE: We ignore the GCN's normalization constant c_ij for this tutorial.
def gcn_message(edges):
    # The argument is a batch of edges.
    # This computes a (batch of) message called 'msg' using the source node's feature 'h'.
    return {'msg' : edges.src['h']}

def gcn_reduce(nodes):
    # The argument is a batch of nodes.
    # This computes the new 'h' features by summing received 'msg' in each node's mailbox.
    return {'h' : torch.sum(nodes.mailbox['msg'], dim=1)}

# Define the GCNLayer module
class GCNLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)

    def forward(self, g, inputs):
        # g is the graph and the inputs is the input node features
        # first set the node features
        g.ndata['h'] = inputs
        # trigger message passing on all edges
        g.send(g.edges(), gcn_message)
        # trigger aggregation at all nodes
        g.recv(g.nodes(), gcn_reduce)
        # get the result node features
        h = g.ndata.pop('h')
        # perform linear transformation
        return self.linear(h)

In [21]:
gcn_msg = fn.copy_src(src='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h')

class NodeApplyModule(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(NodeApplyModule, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.activation = activation

    def forward(self, node):
        h = self.linear(node.data['h'])
        if self.activation is not None:
            h = self.activation(h)
        return {'h' : h}
    
class GCN(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(GCN, self).__init__()
        self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)

    def forward(self, g, feature):
        g.ndata['h'] = feature
        g.update_all(gcn_msg, gcn_reduce)
        g.apply_nodes(func=self.apply_mod)
        return g.ndata.pop('h')

In [22]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.gcn1 = GCN(1433, 16, F.relu)
        self.gcn2 = GCN(16, 7, None)

    def forward(self, g, features):
        x = self.gcn1(g, features)
        x = self.gcn2(g, x)
        return x
net = Net()
print(net)

Net(
  (gcn1): GCN(
    (apply_mod): NodeApplyModule(
      (linear): Linear(in_features=1433, out_features=16, bias=True)
    )
  )
  (gcn2): GCN(
    (apply_mod): NodeApplyModule(
      (linear): Linear(in_features=16, out_features=7, bias=True)
    )
  )
)


In [23]:
import torch
labels = torch.tensor(labels)
inputs = torch.eye(G.number_of_nodes())

  


In [24]:
def evaluate(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = th.max(logits, dim=1)
        correct = th.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

In [25]:
features = torch.eye(2708)

In [31]:
train_mask = [True] * 1 + [False] * (len(train_mask) - 1)

In [34]:
import time
import numpy as np
net = Net()
g, features, labels, train_mask, test_mask = load_cora_data()
train_mask = th.tensor([True] * 2 + [False] * (len(train_mask) - 2))
optimizer = th.optim.Adam(net.parameters(), lr=1e-3)
dur = []
for epoch in range(100):
    if epoch >=3:
        t0 = time.time()

    net.train()
    logits = net(g, features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >=3:
        dur.append(time.time() - t0)

    acc = evaluate(net, g, features, labels, test_mask)
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc, np.mean(dur)))

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00000 | Loss 2.0012 | Test Acc 0.1390 | Time(s) nan
Epoch 00001 | Loss 1.9472 | Test Acc 0.1700 | Time(s) nan
Epoch 00002 | Loss 1.8970 | Test Acc 0.2180 | Time(s) nan
Epoch 00003 | Loss 1.8499 | Test Acc 0.2390 | Time(s) 0.1600
Epoch 00004 | Loss 1.8033 | Test Acc 0.2410 | Time(s) 0.1605
Epoch 00005 | Loss 1.7591 | Test Acc 0.2460 | Time(s) 0.1606
Epoch 00006 | Loss 1.7159 | Test Acc 0.2530 | Time(s) 0.1606
Epoch 00007 | Loss 1.6742 | Test Acc 0.2610 | Time(s) 0.1607
Epoch 00008 | Loss 1.6329 | Test Acc 0.2740 | Time(s) 0.1606
Epoch 00009 | Loss 1.5912 | Test Acc 0.2830 | Time(s) 0.1608
Epoch 00010 | Loss 1.5494 | Test Acc 0.2930 | Time(s) 0.1607
Epoch 00011 | Loss 1.5078 | Test Acc 0.2910 | Time(s) 0.1608
Epoch 00012 | Loss 1.4664 | Test Acc 0.3060 | Time(s) 0.1608
Epoch 00013 | Loss 1.4252 | Test Acc 0.3070 | Time(s) 0.1609
Epoch 00014 | Loss 1.3841 | Test Acc 0.3090 | Time(s) 0.1607
Epoch 00015 | Loss 1.3435 | Test Acc 0.3090 | Time(s) 0.1608
Epoch 00016 | Loss 1.3032 | Test 

In [16]:
S = dgl.DGLGraph()
S.from_networkx(G)
S.ndata['feat'] = inputs

In [41]:
labeled_nodes = np.random.choice(list(range(G.number_of_nodes())), 350)
labels_train = labels[labeled_nodes]
unlabelled_nodes = [i for i in list(range(G.number_of_nodes())) if i not in labeled_nodes]
test_label = labels[unlabelled_nodes]

In [47]:
net = GCN(G.number_of_nodes(), 64, np.unique(labels).shape[0])
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
all_logits = []
for epoch in range(80):
    logits = net(S, inputs)
    # we save the logits for visualization later
    all_logits.append(logits.detach())
    logp = F.log_softmax(logits, 1)
    # we only compute loss for labeled nodes
    loss = F.nll_loss(logp[labeled_nodes], labels_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))


Epoch 0 | Loss: 2.3168
Epoch 1 | Loss: 2.1854
Epoch 2 | Loss: 2.0515
Epoch 3 | Loss: 1.9082
Epoch 4 | Loss: 1.7647
Epoch 5 | Loss: 1.6287
Epoch 6 | Loss: 1.4966
Epoch 7 | Loss: 1.3618
Epoch 8 | Loss: 1.2255
Epoch 9 | Loss: 1.0934
Epoch 10 | Loss: 0.9700
Epoch 11 | Loss: 0.8562
Epoch 12 | Loss: 0.7535
Epoch 13 | Loss: 0.6632
Epoch 14 | Loss: 0.5845
Epoch 15 | Loss: 0.5139
Epoch 16 | Loss: 0.4495
Epoch 17 | Loss: 0.3933
Epoch 18 | Loss: 0.3482
Epoch 19 | Loss: 0.3133
Epoch 20 | Loss: 0.2853
Epoch 21 | Loss: 0.2632
Epoch 22 | Loss: 0.2456
Epoch 23 | Loss: 0.2296
Epoch 24 | Loss: 0.2139
Epoch 25 | Loss: 0.1989
Epoch 26 | Loss: 0.1850
Epoch 27 | Loss: 0.1718
Epoch 28 | Loss: 0.1596
Epoch 29 | Loss: 0.1497
Epoch 30 | Loss: 0.1424
Epoch 31 | Loss: 0.1371
Epoch 32 | Loss: 0.1324
Epoch 33 | Loss: 0.1274
Epoch 34 | Loss: 0.1218
Epoch 35 | Loss: 0.1162
Epoch 36 | Loss: 0.1114
Epoch 37 | Loss: 0.1080
Epoch 38 | Loss: 0.1057
Epoch 39 | Loss: 0.1037
Epoch 40 | Loss: 0.1015
Epoch 41 | Loss: 0.0988
Ep

In [48]:
net.eval()
logits = net(S, inputs)
# we save the logits for visualization later
logp = F.log_softmax(logits, 1)
# we only compute loss for labeled nodes
#loss = F.nll_loss(logp[labeled_nodes], labels_train)

In [49]:
argmax_Y = torch.max(logp[unlabelled_nodes], 1)[1]

In [50]:
print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
    (test_label == argmax_Y.float()).sum().item() / len(test_label) * 100))

Accuracy of argmax predictions on the test set: 44.751381%
