In [1]:
import dgl
%matplotlib inline
import networkx as nx 
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import os

import dgl.function as fn
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

adj = np.load('../../../data/single_graph/BA/graph_adj.npy')
rows, cols = np.where(adj == 1)
edges = zip(rows.tolist(), cols.tolist())
G = nx.Graph()
G.add_edges_from(edges)
labels = pd.read_csv('../../../data/single_graph/BA/data.csv').label.values


In [10]:
len(np.unique(labels))

10

In [2]:
G.number_of_nodes()

200

In [3]:
import torch
import torch.nn as nn
import dgl.function as fn
from dgl.nn.pytorch import edge_softmax, GATConv


class GAT(nn.Module):
    def __init__(self,
                 g,
                 num_layers,
                 in_dim,
                 num_hidden,
                 num_classes,
                 heads,
                 activation,
                 feat_drop,
                 attn_drop,
                 negative_slope,
                 residual):
        super(GAT, self).__init__()
        self.g = g
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        # input projection (no residual)
        self.gat_layers.append(GATConv(
            in_dim, num_hidden, heads[0],
            feat_drop, attn_drop, negative_slope, False, self.activation))
        # hidden layers
        for l in range(1, num_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(
                num_hidden * heads[l-1], num_hidden, heads[l],
                feat_drop, attn_drop, negative_slope, residual, self.activation))
        # output projection
        self.gat_layers.append(GATConv(
            num_hidden * heads[-2], num_classes, heads[-1],
            feat_drop, attn_drop, negative_slope, residual, None))

    def forward(self, inputs):
        h = self.g.in_degrees().view(-1, 1).float()
        #h = inputs
        for l in range(self.num_layers):
            h = self.gat_layers[l](self.g, h).flatten(1)
        # output projection
        logits = self.gat_layers[-1](self.g, h).mean(1)
        return logits

In [4]:
import torch
labels = torch.tensor(labels)
inputs = torch.eye(G.number_of_nodes())

In [5]:
S = dgl.DGLGraph()
S.from_networkx(G)
S.ndata['h'] = inputs

In [6]:
inputs.shape

torch.Size([200, 200])

In [7]:
np.random.seed(1)
labeled_nodes = np.random.choice(list(range(G.number_of_nodes())), int(G.number_of_nodes() * 0.15), replace = False)
labels_train = labels[labeled_nodes]

unlabelled_nodes = [i for i in list(range(G.number_of_nodes())) if i not in labeled_nodes]
val_nodes = np.random.choice(unlabelled_nodes, int(len(unlabelled_nodes)*0.2), replace = False)
test_nodes = [i for i in unlabelled_nodes if i not in val_nodes]

val_label = labels[val_nodes]
test_label = labels[test_nodes]

In [8]:
labeled_nodes

array([ 58,  40,  34, 102, 184, 198,  95,   4,  29, 168, 171,  18,  11,
        89, 110, 118, 159,  35, 136,  59,  51,  16,  44,  94,  31, 162,
        38,  28, 193,  27])

In [11]:
heads = ([2] * 3) + [1]
net = GAT(S, 3, 1, 32, 10, heads, F.elu, .3, .3, .2, False)

optimizer = torch.optim.Adam(net.parameters(), lr=0.002)
all_logits = []
for epoch in range(2000):
    logits = net(inputs)
    # we save the logits for visualization later
    logp = F.log_softmax(logits, 1)
    # we only compute loss for labeled nodes
    loss = F.nll_loss(logp[labeled_nodes], labels_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch%100 == 0:
        print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))


Epoch 0 | Loss: 154.8825
Epoch 100 | Loss: 7.1227
Epoch 200 | Loss: 5.2233
Epoch 300 | Loss: 5.9650
Epoch 400 | Loss: 2.6290
Epoch 500 | Loss: 3.4418
Epoch 600 | Loss: 2.6763
Epoch 700 | Loss: 2.4389
Epoch 800 | Loss: 2.2699
Epoch 900 | Loss: 3.0237
Epoch 1000 | Loss: 2.6132
Epoch 1100 | Loss: 2.4184
Epoch 1200 | Loss: 2.1928
Epoch 1300 | Loss: 2.3225
Epoch 1400 | Loss: 2.1570
Epoch 1500 | Loss: 2.2151
Epoch 1600 | Loss: 2.1654
Epoch 1700 | Loss: 2.1993
Epoch 1800 | Loss: 2.1972
Epoch 1900 | Loss: 2.3749


In [12]:
net.eval()
logits = net(inputs)
# we save the logits for visualization later
logp = F.log_softmax(logits, 1)
# we only compute loss for labeled nodes
#loss = F.nll_loss(logp[labeled_nodes], labels_train)

In [13]:
logp

tensor([[ -0.9580,  -7.7852, -26.6310,  ...,  -1.4376, -10.3898,  -0.9722],
        [ -0.2970, -14.7138, -40.7679,  ...,  -2.4015, -16.4597,  -1.7937],
        [ -0.1986, -17.4598, -46.8344,  ...,  -2.9251, -18.6655,  -2.0679],
        ...,
        [ -0.0675, -22.3445, -55.7989,  ...,  -3.6947, -23.2302,  -3.2094],
        [ -0.9972,  -7.5244, -26.0619,  ...,  -1.4109, -10.1616,  -0.9509],
        [ -0.8538,  -8.1289, -28.0184,  ...,  -1.5704, -10.7501,  -1.0056]],
       grad_fn=<LogSoftmaxBackward>)

In [14]:
argmax_Y = torch.max(logp[test_nodes], 1)[1]

In [15]:
print('Accuracy of argmax predictions on the test set: {:4f}%'.format(
    (test_label == argmax_Y.float()).sum().item() / len(test_label) * 100))

Accuracy of argmax predictions on the test set: 5.147059%
