In [2]:
# Based on those articles/blog:
# pytoch implementation of cgn: https://arxiv.org/pdf/1609.02907.pdf
# http://tkipf.github.io/graph-convolutional-networks/

In [253]:
import torch
import numpy as np
from torch.autograd import Variable
import torch.nn.functional as F
from torch import nn

In [481]:
# Create a lambda graph with adjancy matrix and stuff.
nb_nodes = 10
nb_edges = 20

# nodes
nodes = np.arange(nb_nodes)

# roughly nb_edges edges
edges = np.array([(i, ((((i + np.random.randint(nb_nodes - 1))  % nb_nodes) + 1 ) % nb_nodes ))
                  for i in [np.random.randint(nb_nodes) for i in range(nb_edges)]])

# Adding self loop.
edges = np.concatenate((edges, np.array([(i, i) for i in nodes])))


# adjacent matrix
A = np.zeros((nb_nodes, nb_nodes))
A[edges[:, 0], edges[:, 1]] = 1.
A[edges[:, 1], edges[:, 0]] = 1.

# Degree matrix
D = A.sum(axis=1)

In [482]:
print "Adjency matrix: \n{} \n nb edges: {} \n Degree: {} ".format(A, A.sum(), D)


Adjency matrix: 
[[ 1.  0.  1.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  1.  1.  0.  1.  1.  1.  0.  0.  0.]
 [ 1.  1.  1.  0.  1.  1.  0.  1.  1.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  1.  0.  1.  0.  0.  1.  1.  0.]
 [ 1.  1.  1.  0.  0.  1.  0.  0.  1.  0.]
 [ 0.  1.  0.  0.  0.  0.  1.  0.  1.  1.]
 [ 0.  0.  1.  0.  1.  0.  0.  1.  0.  0.]
 [ 0.  0.  1.  0.  1.  1.  1.  0.  1.  1.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  1.  1.]] 
 nb edges: 42.0 
 Degree: [ 3.  5.  7.  1.  5.  5.  4.  3.  6.  3.] 


In [483]:
# Get the Normalized matrix :D^(-1/2)AD^(-1/2)
np.set_printoptions(precision=2)
D_inv = np.diag(1./np.sqrt(D))
norm_transform = D_inv.dot(A).dot(D_inv)
print norm_transform
# So it's not only an average, it's weighted by something (need to investigate why).
# From what I can tell it's kind of a random walk throught the graph. 
# (i.e. the weights on edges connecting "popular" nodes are smaller)

[[ 0.33  0.    0.22  0.    0.    0.26  0.    0.    0.    0.  ]
 [ 0.    0.2   0.17  0.    0.2   0.2   0.22  0.    0.    0.  ]
 [ 0.22  0.17  0.14  0.    0.17  0.17  0.    0.22  0.15  0.  ]
 [ 0.    0.    0.    1.    0.    0.    0.    0.    0.    0.  ]
 [ 0.    0.2   0.17  0.    0.2   0.    0.    0.26  0.18  0.  ]
 [ 0.26  0.2   0.17  0.    0.    0.2   0.    0.    0.18  0.  ]
 [ 0.    0.22  0.    0.    0.    0.    0.25  0.    0.2   0.29]
 [ 0.    0.    0.22  0.    0.26  0.    0.    0.33  0.    0.  ]
 [ 0.    0.    0.15  0.    0.18  0.18  0.2   0.    0.17  0.24]
 [ 0.    0.    0.    0.    0.    0.    0.29  0.    0.24  0.33]]


In [493]:
# Create a module for the CGN:
class CGN(nn.Module):

    def __init__(self, nb_nodes, input_dim, channels, D_norm, out_dim=None,
                ):
        super(CGN, self).__init__()

        self.my_layers = []
        self.out_dim = out_dim
        self.D_norm = Variable(torch.from_numpy(D_norm).float(), requires_grad=False) # The normalizing matrix.
        dims = [input_dim] + channels
        
        layers = []
        for c_in, c_out in zip(dims[:-1], dims[1:]):
            layer = nn.Linear(c_in, c_out)#Variable(torch.randn(c_in, c_out), requires_grad=True)
            layers.append(layer)
        self.my_layers = nn.ModuleList(layers)
        
        # If we have only one target per graph, we have a linear layer.
        if out_dim is not None:
            self.last_layer = nn.Linear(nb_nodes * channels[-1], out_dim)
        
        
    def forward(self, x):
        
        def batch_mul(x, D):
    
            all_output_c = []
            for i in range(x.size()[-1]):
                all_output_c.append(x[:, :, i].mm(D))

            return torch.stack(all_output_c).permute(1, 2, 0).contiguous()

        nb_examples, nb_nodes = x.size()[0], x.size()[1] 
        
        # Do graph convolution for all 
        for layer in self.my_layers:
            
            # Do the normalization (have to do tricky stack for the number of output. It suck a bit.)
            # We should change here to do a kind of convolution or something.
            x = batch_mul(x, self.D_norm)
            
            # Reshape to do elementwise, for all the node.
            x = x.view(nb_examples*nb_nodes, -1)
            x = F.tanh(layer(x)) # or relu, sigmoid...
            x = x.view(nb_examples, nb_nodes, -1)
            
        if self.out_dim is not None:
            x = self.last_layer(x.view(nb_examples, -1))
        
        return x


In [494]:
# Generate some random data:
nb_examples = 1000 # examples
nb_out = 1 # the umber of output (for classification)


# Generate random stuff.
inputs = Variable(torch.randn((nb_examples, nb_nodes, 1)), requires_grad=False)
#targets = Variable(torch.randn((nb_examples, nb_out)), requires_grad=False)
targets = Variable(torch.sum(inputs.data, dim=1), requires_grad=False).squeeze() # try to predict the sum.


In [495]:
# Create our model.
cgn = CGN(nb_nodes, 1, [16] * 3, norm_transform, nb_out)

print "Our model:"
print cgn

Our model:
CGN (
  (my_layers): ModuleList (
    (0): Linear (1 -> 16)
    (1): Linear (16 -> 16)
    (2): Linear (16 -> 16)
  )
  (last_layer): Linear (160 -> 1)
)


In [496]:
# Train the cgn
learning_rate = 1e-4
criterion = torch.nn.MSELoss(size_average=True)
optimizer = torch.optim.SGD(cgn.parameters(), lr=learning_rate, momentum=0.9)

epoch = 500
for t in range(epoch):
    
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = cgn(inputs)

    # Compute and print loss
    loss = criterion(y_pred, targets)
    
    if t % (epoch/10) == 0:
        print(t, loss.data[0])

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
print "Done!"

(0, 9.584810256958008)
(50, 9.020203590393066)
(100, 7.8015336990356445)
(150, 5.213669776916504)
(200, 1.8049923181533813)
(250, 0.581387460231781)
(300, 0.47112777829170227)
(350, 0.436573326587677)
(400, 0.41667523980140686)
(450, 0.40431204438209534)
Done!


In [497]:
# Check the results, and compare them
outputs = cgn(inputs)
print outputs[:10], targets[:10]
# Good enough for me.

Variable containing:
-6.8392
 1.8050
 2.1839
 4.1821
 1.5968
-2.6059
 1.9223
-4.5044
 0.5823
-0.2158
[torch.FloatTensor of size 10x1]
 Variable containing:
-7.6236
 0.6695
 2.8357
 4.1461
 1.9421
-2.3159
 1.1496
-4.5373
 0.0308
 0.3014
[torch.FloatTensor of size 10]

