In [1]:
import dgl
%matplotlib inline
import networkx as nx 
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import os
os.chdir('../../../graphwave/')

import matplotlib.pyplot as plt
import graphwave
from graphwave.shapes import build_graph

import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

width_basis = 50

### 1. Choose the basis (cycle, torus or chain)
basis_type = "cycle" 

### 2. Add the shapes 
n_shapes = 10  
list_shapes = [["house"]] * n_shapes + [["diamond"]] * n_shapes + [["star", 6]] * n_shapes + [["fan", 6]] * n_shapes

### 3. Pass all these parameters to the Graph Structure
add_edges = 100 # random edges to add
G, communities, _ , role_id = build_graph.build_structure(width_basis, basis_type, list_shapes, start=0,
                                       add_random_edges=add_edges, plot=False,
                                       savefig=False)
d = dict(zip(np.unique(role_id), range(len(np.unique(role_id)))))
labels = np.array([d[i] for i in role_id])

In [2]:
G.number_of_nodes()

300

In [3]:
import torch
import torch.nn as nn
import dgl.function as fn
from dgl.nn.pytorch import edge_softmax, GATConv


class GAT(nn.Module):
    def __init__(self,
                 g,
                 num_layers,
                 in_dim,
                 num_hidden,
                 num_classes,
                 heads,
                 activation,
                 feat_drop,
                 attn_drop,
                 negative_slope,
                 residual):
        super(GAT, self).__init__()
        self.g = g
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        # input projection (no residual)
        self.gat_layers.append(GATConv(
            in_dim, num_hidden, heads[0],
            feat_drop, attn_drop, negative_slope, False, self.activation))
        # hidden layers
        for l in range(1, num_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(
                num_hidden * heads[l-1], num_hidden, heads[l],
                feat_drop, attn_drop, negative_slope, residual, self.activation))
        # output projection
        self.gat_layers.append(GATConv(
            num_hidden * heads[-2], num_classes, heads[-1],
            feat_drop, attn_drop, negative_slope, residual, None))

    def forward(self, inputs):
        h = self.g.in_degrees().view(-1, 1).float()
        #h = inputs
        for l in range(self.num_layers):
            h = self.gat_layers[l](self.g, h).flatten(1)
        # output projection
        logits = self.gat_layers[-1](self.g, h).mean(1)
        return logits

In [4]:
import torch
labels = torch.tensor(labels)
inputs = torch.eye(G.number_of_nodes())

In [5]:
S = dgl.DGLGraph()
S.from_networkx(G)
S.ndata['h'] = inputs

In [6]:
inputs.shape

torch.Size([300, 300])

In [7]:
np.random.seed(1)
labeled_nodes = np.random.choice(list(range(G.number_of_nodes())), int(G.number_of_nodes() * 0.15), replace = False)
labels_train = labels[labeled_nodes]

unlabelled_nodes = [i for i in list(range(G.number_of_nodes())) if i not in labeled_nodes]
val_nodes = np.random.choice(unlabelled_nodes, int(len(unlabelled_nodes)*0.2), replace = False)
test_nodes = [i for i in unlabelled_nodes if i not in val_nodes]

val_label = labels[val_nodes]
test_label = labels[test_nodes]

In [8]:
labeled_nodes

array([189, 123, 185, 213, 106, 127, 176,  73, 275, 242, 266, 147, 299,
        58, 122,  78,  11, 167, 220,  29,  27, 110, 251,  12, 105,  18,
       297,  90, 293, 184, 139, 248, 229,  59,  51,  88,  95, 164,  80,
       217, 221, 228, 191,   4,  70])

In [11]:
heads = ([2] * 3) + [1]
net = GAT(S, 3, 1, 32, 17, heads, F.elu, .3, .3, .2, False)

optimizer = torch.optim.Adam(net.parameters(), lr=0.002)
all_logits = []
for epoch in range(2000):
    logits = net(inputs)
    # we save the logits for visualization later
    logp = F.log_softmax(logits, 1)
    # we only compute loss for labeled nodes
    loss = F.nll_loss(logp[labeled_nodes], labels_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch%100 == 0:
        print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))


Epoch 0 | Loss: 5.3663
Epoch 100 | Loss: 2.6359
Epoch 200 | Loss: 2.5178
Epoch 300 | Loss: 2.2414
Epoch 400 | Loss: 2.1468
Epoch 500 | Loss: 2.4150
Epoch 600 | Loss: 2.0852
Epoch 700 | Loss: 2.1001
Epoch 800 | Loss: 2.0909
Epoch 900 | Loss: 2.0447
Epoch 1000 | Loss: 2.2949
Epoch 1100 | Loss: 2.0190
Epoch 1200 | Loss: 1.8654
Epoch 1300 | Loss: 1.9245
Epoch 1400 | Loss: 2.2727
Epoch 1500 | Loss: 2.0938
Epoch 1600 | Loss: 1.9930
Epoch 1700 | Loss: 2.0411
Epoch 1800 | Loss: 2.0852
Epoch 1900 | Loss: 1.8101


In [12]:
net.eval()
logits = net(inputs)
# we save the logits for visualization later
logp = F.log_softmax(logits, 1)
# we only compute loss for labeled nodes
#loss = F.nll_loss(logp[labeled_nodes], labels_train)

In [13]:
logp

tensor([[-14.9496,  -4.3516,  -5.7407,  ...,  -4.2716,  -3.1702,  -5.5340],
        [-14.8440,  -4.2218,  -5.3892,  ...,  -4.2352,  -3.2041,  -5.4097],
        [-14.7956,  -4.2231,  -5.2378,  ...,  -4.3001,  -3.2845,  -5.3591],
        ...,
        [-15.4750,  -3.7269,  -5.0493,  ...,  -4.0530,  -2.7159,  -4.9317],
        [-14.8445,  -4.6181,  -6.4436,  ...,  -4.2698,  -3.2026,  -5.8785],
        [-14.7487,  -4.8938,  -7.4249,  ...,  -4.4523,  -3.3096,  -6.3532]],
       grad_fn=<LogSoftmaxBackward>)

In [14]:
argmax_Y = torch.max(logp[test_nodes], 1)[1]

In [15]:
print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
    (test_label == argmax_Y.float()).sum().item() / len(test_label) * 100))

Accuracy of argmax predictions on the test set: 19.117647%
