In [3]:
import torch
import numpy as np

import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torchvision import datasets, transforms

# DGCNN ([helped](https://github.com/ang3loliveira/behavioral_malware_detection_dgcnn))

In [1]:
def norn_adj(X, input_dim_1):
    
    A = torch.zeros((X.size(0), input_dim_1, input_dim_1), dtype = torch.float).cuda()
        
    A_view = A.view(A.size(0), -1)
    x_size = X.size(-1)
    indices = X.narrow(-1, 0, x_size - 1) * A.stride(1) * A.stride(2) + X.narrow(-1, 1, x_size - 1) * A.stride(2)
    A_view.scatter_(1, indices, 1)
        
    A_hat = A + torch.eye(input_dim_1, dtype = torch.float).cuda()
    D_hat = A_hat.sum(dim = 1).pow(-1.0).diag_embed()
    
    return A_hat, D_hat

def to_one_hot(X, input_dim_1):
    
    X = F.one_hot(X, num_classes = input_dim_1).float()    
    X = X.permute(0, 2, 1)
    
    return X

In [4]:
class DGCNN_network(nn.Module):
    
    def __init__(self, weight_dim_1, weight_dim_2):

        super(DGCNN_network, self).__init__()
        self.weight_dim_1 = weight_dim_1
        self.weight_dim_2 = weight_dim_2
        self.weights = nn.Parameter(torch.rand((self.weight_dim_1, weight_dim_2), 
                                    dtype = torch.float, requires_grad = True))
        
    def forward(self, A_hat, D_hat, X):
        return D_hat.matmul(A_hat).matmul(X).matmul(self.weights)

class Model_1_network(nn.Module):
    
    def __init__(self, input_dim_1, input_dim_2, weight_dim_2, dropout_rate):
        
        super(Model_1_network, self).__init__()
        
        self.input_dim_1 = input_dim_1
        self.input_dim_2 = input_dim_2
        self.weight_dim_1 = input_dim_2
        self.weight_dim_2 = weight_dim_2
        self.dropout_rate = dropout_rate
        
        self.dgcnn = DGCNN_network(self.weight_dim_1, self.weight_dim_2)
        self.dropout = nn.Dropout(p = self.dropout_rate)
        self.fc = nn.Linear(self.input_dim_1 * self.weight_dim_2, 1)
        
    def forward(self, X):
        
        A_hat, D_hat = norn_adj(X, input_dim_1)
        X = to_one_hot(X, input_dim_1)        

        H = self.dgcnn(A_hat, D_hat, X)
        H = self.dropout(H)
        H = torch.relu(H)
        H = H.view(H.size(0), -1)
        H = self.fc(H)
        return H.squeeze()
    
model = Model_1_network(
    input_dim_1 = 307,
    input_dim_2 = 100,
    weight_dim_2 = 31,
    dropout_rate = 0.4
)

print(model)
print(f'\nParameters: {np.sum([param.numel() for param in model.parameters()])}')
del model

Model_1_network(
  (dgcnn): DGCNN_network()
  (dropout): Dropout(p=0.4, inplace=False)
  (fc): Linear(in_features=9517, out_features=1, bias=True)
)

Parameters: 12618


# Graph Convolutional Network using [DGL](https://docs.dgl.ai/en/latest/tutorials/models/1_gnn/1_gcn.html)

Mathematically, the GCN model follows this formula:

$H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})$

Here, $H^{(l)}$ denotes the $l^{th}$ layer in the network,
$\sigma$ is the non-linearity, and $W$ is the weight matrix for
this layer. $D$ and $A$, as commonly seen, represent degree
matrix and adjacency matrix, respectively. The ~ is a renormalization trick
in which we add a self-connection to each node of the graph, and build the
corresponding degree and adjacency matrix.  The shape of the input
$H^{(0)}$ is $N \times D$, where $N$ is the number of nodes
and $D$ is the number of input features. We can chain up multiple
layers as such to produce a node-level representation output with shape
:math`N \times F`, where $F$ is the dimension of the output node
feature vector.

The equation can be efficiently implemented using sparse matrix
multiplication kernels (such as Kipf's
`pygcn <https://github.com/tkipf/pygcn>`_ code). The above DGL implementation
in fact has already used this trick due to the use of builtin functions. To
understand what is under the hood, please read our tutorial on :doc:`PageRank <../../basics/3_pagerank>`.



In [6]:
import dgl
import dgl.function as fn
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

gcn_msg = fn.copy_src(src='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h')

In [7]:
class NodeApplyModule(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(NodeApplyModule, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.activation = activation

    def forward(self, node):
        h = self.linear(node.data['h'])
        if self.activation is not None:
            h = self.activation(h)
        return {'h' : h}
    
class GCN(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(GCN, self).__init__()
        self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)

    def forward(self, g, feature):
        g.ndata['h'] = feature
        g.update_all(gcn_msg, gcn_reduce)
        g.apply_nodes(func=self.apply_mod)
        return g.ndata.pop('h')
    
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.gcn1 = GCN(1433, 16, F.relu)
        self.gcn2 = GCN(16, 7, None)
    
    def forward(self, g, features):
        x = self.gcn1(g, features)
        x = self.gcn2(g, x)
        return x

In [8]:
net = Net()
print(net)

Net(
  (gcn1): GCN(
    (apply_mod): NodeApplyModule(
      (linear): Linear(in_features=1433, out_features=16, bias=True)
    )
  )
  (gcn2): GCN(
    (apply_mod): NodeApplyModule(
      (linear): Linear(in_features=16, out_features=7, bias=True)
    )
  )
)


In [9]:
from dgl.data import citation_graph as citegrh
import networkx as nx
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    return g, features, labels, train_mask, test_mask

In [10]:
def evaluate(model, g, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

#### Training

In [11]:
import time
import numpy as np
g, features, labels, train_mask, test_mask = load_cora_data()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
dur = []
for epoch in range(50):
    if epoch >=3:
        t0 = time.time()

    net.train()
    logits = net(g, features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch >=3:
        dur.append(time.time() - t0)
    
    acc = evaluate(net, g, features, labels, test_mask)
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc, np.mean(dur)))

Downloading /Users/Pablo/.dgl/cora.zip from https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/cora_raw.zip...
Extracting file to /Users/Pablo/.dgl/cora


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00000 | Loss 1.9488 | Test Acc 0.1300 | Time(s) nan
Epoch 00001 | Loss 1.9159 | Test Acc 0.2160 | Time(s) nan
Epoch 00002 | Loss 1.8868 | Test Acc 0.3250 | Time(s) nan
Epoch 00003 | Loss 1.8604 | Test Acc 0.3740 | Time(s) 0.0239
Epoch 00004 | Loss 1.8346 | Test Acc 0.3920 | Time(s) 0.0240
Epoch 00005 | Loss 1.8087 | Test Acc 0.4040 | Time(s) 0.0252
Epoch 00006 | Loss 1.7826 | Test Acc 0.4120 | Time(s) 0.0287
Epoch 00007 | Loss 1.7562 | Test Acc 0.4230 | Time(s) 0.0280
Epoch 00008 | Loss 1.7292 | Test Acc 0.4320 | Time(s) 0.0285
Epoch 00009 | Loss 1.7017 | Test Acc 0.4380 | Time(s) 0.0280
Epoch 00010 | Loss 1.6741 | Test Acc 0.4420 | Time(s) 0.0285
Epoch 00011 | Loss 1.6465 | Test Acc 0.4440 | Time(s) 0.0287
Epoch 00012 | Loss 1.6187 | Test Acc 0.4550 | Time(s) 0.0283
Epoch 00013 | Loss 1.5910 | Test Acc 0.4620 | Time(s) 0.0279
Epoch 00014 | Loss 1.5635 | Test Acc 0.4710 | Time(s) 0.0278
Epoch 00015 | Loss 1.5363 | Test Acc 0.4810 | Time(s) 0.0280
Epoch 00016 | Loss 1.5102 | Test 

# Graph Convolutional Network using [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric)