In [2]:
# Based on those articles/blog:
# pytoch implementation of cgn: https://arxiv.org/pdf/1609.02907.pdf
# http://tkipf.github.io/graph-convolutional-networks/

In [253]:
import torch
import numpy as np
from torch.autograd import Variable
import torch.nn.functional as F
from torch import nn

In [75]:
# Create a lambda graph with adjancy matrix and stuff.
nb_nodes = 10
nb_edges = 10

# nodes
nodes = np.arange(nb_nodes)

# roughly nb_edges edges
edges = np.array([(i, ((((i + np.random.randint(nb_nodes - 1))  % nb_nodes) + 1 ) % nb_nodes ))
                  for i in [np.random.randint(nb_nodes) for i in range(nb_edges)]])

# Adding self loop.
edges = np.concatenate((edges, np.array([(i, i) for i in nodes])))


# adjacent matrix
A = np.zeros((nb_nodes, nb_nodes))
A[edges[:, 0], edges[:, 1]] = 1.
A[edges[:, 1from torch.autograd import Variable], edges[:, 0]] = 1.

# Degree matrix
D = A.sum(axis=1)

In [76]:
print "Adjency matrix: \n{} \n nb edges: {} \n Degree: {} ".format(A, A.sum(), D)


Adjency matrix: 
[[ 1.  0.  1.  0.  0.  0.  0.  0.  1.  0.]
 [ 0.  1.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 1.  0.  1.  0.  1.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  1.  0.  0.  1.  0.  0.]
 [ 0.  0.  1.  1.  1.  0.  0.  0.  0.  1.]
 [ 0.  1.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  1.  0.  1.]
 [ 0.  0.  1.  1.  0.  0.  1.  1.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.  0.  1.  0.  0.  1.]] 
 nb edges: 30.0 
 Degree: [ 3.  2.  4.  3.  4.  2.  3.  4.  2.  3.] 


In [77]:
# Get the Normalized matrix :D^(-1/2)AD^(-1/2)
np.set_printoptions(precision=2)
D_inv = np.diag(1./np.sqrt(D))
norm_transform = D_inv.dot(A).dot(D_inv)
print norm_transform
# So it's not only an average, it's weighted by something (need to investigate why).
# From what I can tell it's kind of a random walk throught the graph. 
# (i.e. the weights on edges connecting "popular" nodes are smaller)

[[ 0.33  0.    0.29  0.    0.    0.    0.    0.    0.41  0.  ]
 [ 0.    0.5   0.    0.    0.    0.5   0.    0.    0.    0.  ]
 [ 0.29  0.    0.25  0.    0.25  0.    0.    0.25  0.    0.  ]
 [ 0.    0.    0.    0.33  0.29  0.    0.    0.29  0.    0.  ]
 [ 0.    0.    0.25  0.29  0.25  0.    0.    0.    0.    0.29]
 [ 0.    0.5   0.    0.    0.    0.5   0.    0.    0.    0.  ]
 [ 0.    0.    0.    0.    0.    0.    0.33  0.29  0.    0.33]
 [ 0.    0.    0.25  0.29  0.    0.    0.29  0.25  0.    0.  ]
 [ 0.41  0.    0.    0.    0.    0.    0.    0.    0.5   0.  ]
 [ 0.    0.    0.    0.    0.29  0.    0.33  0.    0.    0.33]]


In [319]:
# Create a module for the CGN:
class CGN(nn.Module):

    def __init__(self, nb_nodes, input_dim, channels, D_norm, out_dim=None):
        super(CGN, self).__init__()

        self.my_layers = []
        self.out_dim = out_dim
        self.D_norm = Variable(torch.from_numpy(D_norm).float(), requires_grad=False) # The normalizing matrix.
        
        dims = [input_dim] + channels
        
        layers = []
        for c_in, c_out in zip(dims[:-1], dims[1:]):
            layer = nn.Linear(c_in, c_out)#Variable(torch.randn(c_in, c_out), requires_grad=True)
            layers.append(layer)
        self.my_layers = nn.ModuleList(layers)
        
        # If we have only one target per graph, we have a linear layer.
        if out_dim is not None:
            self.last_layer = nn.Linear(nb_nodes * channels[-1], out_dim)
        
        
    def forward(self, x):
        
        def batch_mul(x, D):
    
            all_output_c = []
            for i in range(x.size()[-1]):
                all_output_c.append(x[:, :, i].mm(D))

            return torch.stack(all_output_c).permute(1, 2, 0).contiguous()

        nb_examples, nb_nodes = x.size()[0], x.size()[1] 
        
        # Do graph convolution for all 
        for layer in self.my_layers:
            
            # Do the normalization (have to do tricky stack for the number of output. It suck a bit.)
            x = batch_mul(x, self.D_norm)
            #x = x.mm(torch.stack([self.D_norm] * x.size()[-1]))
            
            # Reshape to do elementwise, for all the node.
            x = x.view(nb_examples*nb_nodes, -1)
            x = F.relu(layer(x))
            x = x.view(nb_examples, nb_nodes, -1)
            
            #x = F.relu(batch_mul(x,  self.D_norm).view(nb_nodes*nb_examples, -1).mm(w)).view(nb_examples, nb_nodes, -1)
            
        if self.out_dim is not None:
            x = self.last_layer(x.view(nb_examples, -1))
        
        return x


In [344]:
# Generate some random data:
nb_examples = 200 # examples
nb_out = 1 # the umber of output (for classification)


# Generate random stuff.
inputs = Variable(torch.randn((nb_examples, nb_nodes, 1)), requires_grad=False)
targets = Variable(torch.randn((nb_examples, nb_out)), requires_grad=False)


In [345]:
# Create our model.
cgn = CGN(nb_nodes, 1, [16, 16, 16], norm_transform, nb_out)

print "Our model:"
print cgn

Our model:
CGN (
  (my_layers): ModuleList (
    (0): Linear (1 -> 16)
    (1): Linear (16 -> 16)
    (2): Linear (16 -> 16)
  )
  (last_layer): Linear (160 -> 1)
)


In [346]:
# Train the cgn
learning_rate = 1e-4
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(cgn.parameters(), lr=learning_rate, momentum=0.9)

epoch = 5000
for t in range(epoch):
    
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = cgn(inputs)

    # Compute and print loss
    loss = criterion(y_pred, targets)
    
    if t % (epoch/10) == 0:
        print(t, loss.data[0])

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
print "Done!"

(0, 177.50282287597656)
(500, 156.74127197265625)
(1000, 135.6536407470703)
(1500, 108.57597351074219)
(2000, 84.69371032714844)
(2500, 59.66895294189453)
(3000, 43.29962921142578)
(3500, 32.25688934326172)
(4000, 20.712610244750977)
(4500, 15.861947059631348)


In [347]:
# Check the results, and compare them
outputs = cgn(inputs)
print outputs[:10], targets[:10]
# Good enough for me.

Variable containing:
-1.7260
 0.1509
-0.2850
 0.8225
-1.2579
-0.1841
 0.5832
-0.8808
 1.7042
-0.6613
[torch.FloatTensor of size 10x1]
 Variable containing:
-1.9094
 0.0164
-0.4924
 0.9970
-1.0331
-0.3450
 0.4138
-0.8928
 1.8359
-0.3402
[torch.FloatTensor of size 10x1]

