# Residual Gated Graph ConvNets
### Xavier Bresson, Jan. 15 2018

In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import pdb 
import time
import numpy as np
import pickle
import os

if torch.cuda.is_available():
    print('cuda available')
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
    #torch.cuda.manual_seed(1)
else:
    print('cuda not available')
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor
    #torch.manual_seed(1)
    
# import files in folder util
import sys
sys.path.insert(0, 'util/')
import block 
import graph_generator as g

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="2"

from sklearn.metrics import confusion_matrix

cuda available


In [2]:

#################
# select task and task parameters
#################


# subgraph matching
if 1==1:
    task_parameters = {}
    task_parameters['flag_task'] = 'matching'
    task_parameters['nb_communities'] = 10
    task_parameters['nb_clusters_target'] = 2
    task_parameters['Voc'] = 3
    task_parameters['size_min'] = 15
    task_parameters['size_max'] = 25
    task_parameters['size_subgraph'] = 20
    task_parameters['p'] = 0.5
    task_parameters['q'] = 0.1
    task_parameters['W0'] = block.random_graph(task_parameters['size_subgraph'],task_parameters['p'])
    task_parameters['u0'] = np.random.randint(task_parameters['Voc'],size=task_parameters['size_subgraph'])

    
# semi-supervised clustering
if 2==1:
    task_parameters = {}
    task_parameters['flag_task'] = 'clustering'
    task_parameters['nb_communities'] = 10
    task_parameters['nb_clusters_target'] = task_parameters['nb_communities']
    task_parameters['Voc'] = task_parameters['nb_communities'] + 1
    task_parameters['size_min'] = 5
    task_parameters['size_max'] = 25
    task_parameters['p'] = 0.5
    task_parameters['q'] = 0.1  
    file_name = 'data/set_100_clustering_maps_p05_q01_size5_25_2017-10-31_10-25-00_.txt'
    with open(file_name, 'rb') as fp:
        all_trainx = pickle.load(fp)
    task_parameters['all_trainx'] = all_trainx[:100]
    
    
#print(task_parameters)


In [3]:

##############################
# Class cell definition
##############################
class OurConvNetcell(nn.Module):
    
    def __init__(self, dim_in, dim_out):
        super(OurConvNetcell, self).__init__()

        # conv1
        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Uj1 = nn.Linear(dim_in, dim_out, bias=False)
        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)  
        self.bu1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # conv2
        self.Ui2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Uj2 = nn.Linear(dim_out, dim_out, bias=False)
        self.Vi2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)  
        self.bu2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # bn1, bn2
        self.bn1 = torch.nn.BatchNorm1d(dim_out)
        self.bn2 = torch.nn.BatchNorm1d(dim_out)
        
        # resnet
        self.R = nn.Linear(dim_in, dim_out, bias=False) 
            
        # init
        self.init_weights_OurConvNetcell(dim_in, dim_out, 1)
        
         
    def init_weights_OurConvNetcell(self, dim_in, dim_out, gain):
        
        # conv1
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.Ui1.weight.data.uniform_(-scale, scale) 
        self.Uj1.weight.data.uniform_(-scale, scale) 
        self.Vi1.weight.data.uniform_(-scale, scale) 
        self.Vj1.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu1.data.fill_(0)
        self.bv1.data.fill_(0)
        
        # conv2
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.Ui2.weight.data.uniform_(-scale, scale) 
        self.Uj2.weight.data.uniform_(-scale, scale) 
        self.Vi2.weight.data.uniform_(-scale, scale) 
        self.Vj2.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu2.data.fill_(0)
        self.bv2.data.fill_(0)
        
        # RN
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.R.weight.data.uniform_(-scale, scale)  
            
            
    def forward(self, x, E_start, E_end):
        
        # E_start, E_end : E x V

        xin = x
        # conv1
        Vix = self.Vi1(x)  #  V x H_out
        Vjx = self.Vj1(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
        x1 = F.sigmoid(x1)
        Uix = self.Ui1(x)  #  V x H_out
        x2 = torch.mm(E_start, Uix)  #  V x H_out
        Ujx = self.Uj1(x)  #  V x H_out
        x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
        # bn1
        x = self.bn1(x)
        # relu1
        x = F.relu(x)
        # conv2
        Vix = self.Vi2(x)  #  V x H_out
        Vjx = self.Vj2(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
        x1 = F.sigmoid(x1)
        Uix = self.Ui2(x)  #  V x H_out
        x2 = torch.mm(E_start, Uix)  #  V x H_out
        Ujx = self.Uj2(x)  #  V x H_out
        x = Uix + torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out
        # bn2
        x = self.bn2(x)
        # addition
        x = x + self.R(xin)
        # relu2
        x = F.relu(x)
            
        return x
        
        
        
        
        
##############################
# Class NN definition
##############################  
class Graph_OurConvNet(nn.Module):
    
    def __init__(self, net_parameters):
        
        super(Graph_OurConvNet, self).__init__()
        
        # parameters
        flag_task = task_parameters['flag_task']
        Voc = net_parameters['Voc']
        D = net_parameters['D']
        nb_clusters_target = net_parameters['nb_clusters_target']
        H = net_parameters['H']
        L = net_parameters['L']
        
        # vector of hidden dimensions
        net_layers = []
        for layer in range(L):
            net_layers.append(H)

        # embedding
        self.encoder = nn.Embedding(Voc, D)      
        
        # CL cells
        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
        net_layers_extended = [D] + net_layers # include embedding dim
        L = len(net_layers)
        list_of_gnn_cells = [] # list of NN cells
        for layer in range(L//2):
            Hin, Hout = net_layers_extended[2*layer], net_layers_extended[2*layer+2]
            list_of_gnn_cells.append(OurConvNetcell(Hin,Hout))
            
        # register the cells for pytorch
        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
              
        # fc
        Hfinal = net_layers_extended[-1]
        self.fc = nn.Linear(Hfinal,nb_clusters_target) 
        
        # init
        self.init_weights_Graph_OurConvNet(Voc,D,Hfinal,nb_clusters_target,1)
        
        # print
        print('\nnb of hidden layers=',L)
        print('dim of layers (w/ embed dim)=',net_layers_extended)      
        print('\n')
        
        # class variables
        self.L = L
        self.net_layers_extended = net_layers_extended      
        self.flag_task = flag_task
        
        
    def init_weights_Graph_OurConvNet(self, Fin_enc, Fout_enc, Fin_fc, Fout_fc, gain):

        scale = gain* np.sqrt( 2.0/ Fin_enc )
        self.encoder.weight.data.uniform_(-scale, scale)  
        scale = gain* np.sqrt( 2.0/ Fin_fc )
        self.fc.weight.data.uniform_(-scale, scale)  
        self.fc.bias.data.fill_(0)  
    
            
    def forward(self, G):
        
        # signal
        x = G.signal  # V-dim
        x = Variable( torch.LongTensor(x).type(dtypeLong) , requires_grad=False)
           
        # encoder
        x_emb = self.encoder(x) # V x D
        
        # graph operators
        # Edge = start vertex to end vertex
        # E_start = E x V mapping matrix from edge index to corresponding start vertex
        # E_end = E x V mapping matrix from edge index to corresponding end vertex
        E_start = G.edge_to_starting_vertex
        E_end   = G.edge_to_ending_vertex 
        E_start = torch.from_numpy(E_start.toarray()).type(dtypeFloat)
        E_end = torch.from_numpy(E_end.toarray()).type(dtypeFloat) 
        E_start = Variable( E_start , requires_grad=False) 
        E_end = Variable( E_end , requires_grad=False) 
        
        # convnet cells  
        x = x_emb
        for layer in range(self.L//2):
            gnn_layer = self.gnn_cells[layer]            
            x = gnn_layer(x,E_start,E_end) # V x Hfinal
            
        # FC
        x = self.fc(x)   
        
        return x
        
        
    def loss(self, y, y_target, weight):
        
        loss = nn.CrossEntropyLoss(weight=weight.type(dtypeFloat))(y,y_target)
        
        return loss
       
        
    def update(self, lr):
                
        update = torch.optim.Adam( self.parameters(), lr=lr )
        
        return update
    
    
    def update_learning_rate(self, optimizer, lr):
   
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        return optimizer
    
    
    def nb_param(self):

        return self.nb_param
    
    
    

In [4]:

#################
# network and optimization parameters
#################

# network parameters
net_parameters = {}
net_parameters['Voc'] = task_parameters['Voc']
net_parameters['D'] = 50
net_parameters['nb_clusters_target'] = task_parameters['nb_clusters_target']
net_parameters['H'] = 50
net_parameters['L'] = 10
#print(net_parameters)


# optimization parameters
opt_parameters = {}
opt_parameters['learning_rate'] = 0.00075   # ADAM
opt_parameters['max_iters'] = 5000   
opt_parameters['batch_iters'] = 100
if 2==1: # fast debugging
    opt_parameters['max_iters'] = 101 
    opt_parameters['batch_iters'] = 10
opt_parameters['decay_rate'] = 1.25   
#print(opt_parameters)


In [5]:
 
#########################
# Graph convnet function
#########################
def our_graph_convnets(task_parameters,net_parameters,opt_parameters):


    # Delete existing network if exists
    try:
        del net
        print('Delete existing network\n')
    except NameError:
        print('No existing network to delete\n')


    # instantiate
    net = Graph_OurConvNet(net_parameters)
    if torch.cuda.is_available():
        net.cuda()
    print(net)
    
    
    # number of network parameters
    nb_param = 0
    for param in net.parameters():
        nb_param += np.prod(list(param.data.size()))
    print('nb_param=',nb_param,' L=',net_parameters['L'])
    

    # task parameters
    flag_task = task_parameters['flag_task']
    # network parameters
    Voc = net_parameters['Voc']
    D = net_parameters['D']
    nb_clusters_target = net_parameters['nb_clusters_target']
    H = net_parameters['H']
    L = net_parameters['L']
    # optimization parameters
    learning_rate = opt_parameters['learning_rate']
    max_iters = opt_parameters['max_iters']
    batch_iters = opt_parameters['batch_iters']
    decay_rate = opt_parameters['decay_rate']
    
    
    # Optimizer
    global_lr = learning_rate
    global_step = 0
    lr = learning_rate
    optimizer = net.update(lr) 

    
    #############
    # loop over epochs
    #############
    t_start = time.time()
    t_start_total = time.time()
    average_loss_old = 1e10
    running_loss = 0.0
    running_total = 0
    running_conf_mat = 0
    running_accuracy = 0
    tab_results = []
    for iteration in range(1*max_iters):  # loop over the dataset multiple times

        # generate one train graph
        if flag_task=='matching': # subgraph matching
            train_x = g.variable_size_graph(task_parameters)
        elif flag_task=='clustering': # semi supervised clustering
            train_x = g.graph_semi_super_clu(task_parameters)
        train_y = train_x.target
        train_y = Variable( torch.LongTensor(train_y).type(dtypeLong) , requires_grad=False) 

        # forward, loss
        y = net.forward(train_x)
        # compute loss weigth
        labels = train_y.data.cpu().numpy()
        V = labels.shape[0]
        nb_classes = len(np.unique(labels)) 
        cluster_sizes = np.zeros(nb_classes)
        for r in range(nb_classes):
            cluster = np.where(labels==r)[0]
            cluster_sizes[r] = len(cluster)    
        weight = torch.zeros(nb_classes)
        for r in range(nb_classes):
            sumj = 0
            for j in range(nb_classes):
                if j!=r:
                    sumj += cluster_sizes[j]
            weight[r] = sumj/ V 
        loss = net.loss(y,train_y,weight)
        loss_train = loss.data[0]
        running_loss += loss_train
        running_total += 1

        # confusion matrix
        S = train_y.data.cpu().numpy()
        C = np.argmax( torch.nn.Softmax(dim=0)(y).data.cpu().numpy() , axis=1)
        CM = confusion_matrix(S,C).astype(np.float32)
        nb_classes = CM.shape[0]
        train_y = train_y.data.cpu().numpy()
        for r in range(nb_classes):
            cluster = np.where(train_y==r)[0]
            CM[r,:] /= cluster.shape[0]
        running_conf_mat += CM
        running_accuracy += np.sum(np.diag(CM))/ nb_classes

        # backward, update
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # learning rate, print results
        if not iteration%batch_iters:

            # time
            t_stop = time.time() - t_start
            t_start = time.time()

            # confusion matrix
            average_conf_mat = running_conf_mat/ running_total
            running_conf_mat = 0

            # accuracy
            average_accuracy = running_accuracy/ running_total
            running_accuracy = 0

            # update learning rate 
            average_loss = running_loss/ running_total
            if average_loss > 0.99* average_loss_old:
                lr /= decay_rate
            average_loss_old = average_loss
            optimizer = net.update_learning_rate(optimizer, lr)
            running_loss = 0.0
            running_total = 0

            # save intermediate results
            tab_results.append([iteration,average_loss,100* average_accuracy,time.time()-t_start_total])

            # print results
            if 1==1:
                print('\niteration= %d, loss(%diter)= %.3f, lr= %.8f, time(%diter)= %.2f' % 
                      (iteration, batch_iters, average_loss, lr, batch_iters, t_stop))
                #print('Confusion matrix= \n', 100* average_conf_mat)
                print('accuracy= %.3f' % (100* average_accuracy))

                
                
                
                
                
    ############            
    # Evaluation on 100 pre-saved data
    ############
    running_loss = 0.0
    running_total = 0
    running_conf_mat = 0
    running_accuracy = 0
    for iteration in range(100):
        
        # generate one data
        if flag_task == 'matching':
            train_x = g.variable_size_graph(task_parameters)
        if flag_task == 'clustering':
            train_x = task_parameters['all_trainx'][iteration][1]
        train_y = train_x.target
        train_y = Variable( torch.LongTensor(train_y).type(dtypeLong) , requires_grad=False) 
        
        # forward, loss
        y = net.forward(train_x)
        # compute loss weigth
        labels = train_y.data.cpu().numpy()
        V = labels.shape[0]
        nb_classes = len(np.unique(labels)) 
        cluster_sizes = np.zeros(nb_classes)
        for r in range(nb_classes):
            cluster = np.where(labels==r)[0]
            cluster_sizes[r] = len(cluster)    
        weight = torch.zeros(nb_classes)
        for r in range(nb_classes):
            sumj = 0
            for j in range(nb_classes):
                if j!=r:
                    sumj += cluster_sizes[j]
            weight[r] = sumj/ V 
        loss = net.loss(y,train_y,weight)
        loss_train = loss.data[0]
        running_loss += loss_train
        running_total += 1
        
        # confusion matrix
        S = train_y.data.cpu().numpy()
        C = np.argmax( torch.nn.Softmax(dim=0)(y).data.cpu().numpy() , axis=1)
        CM = confusion_matrix(S,C).astype(np.float32)
        nb_classes = CM.shape[0]
        train_y = train_y.data.cpu().numpy()
        for r in range(nb_classes):
            cluster = np.where(train_y==r)[0]
            CM[r,:] /= cluster.shape[0]
        running_conf_mat += CM
        running_accuracy += np.sum(np.diag(CM))/ nb_classes

        # confusion matrix
        average_conf_mat = running_conf_mat/ running_total
        average_accuracy = running_accuracy/ running_total
        average_loss = running_loss/ running_total
        
    # print results
    print('\nloss(100 pre-saved data)= %.3f, accuracy(100 pre-saved data)= %.3f' % (average_loss,100* average_accuracy))

        
        
        
    #############
    # output
    #############
    result = {}
    result['final_loss'] = average_loss
    result['final_acc'] = 100* average_accuracy
    result['final_CM'] = 100* average_conf_mat
    result['final_batch_time'] = t_stop
    result['nb_param_nn'] = nb_param
    result['plot_all_epochs'] = tab_results
    #print(result)
    
    
    return result






#run it
result = our_graph_convnets(task_parameters,net_parameters,opt_parameters)


No existing network to delete


nb of hidden layers= 10
dim of layers (w/ embed dim)= [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]


Graph_OurConvNet(
  (encoder): Embedding(3, 50)
  (gnn_cells): ModuleList(
    (0): OurConvNetcell(
      (Ui1): Linear(in_features=50, out_features=50, bias=False)
      (Uj1): Linear(in_features=50, out_features=50, bias=False)
      (Vi1): Linear(in_features=50, out_features=50, bias=False)
      (Vj1): Linear(in_features=50, out_features=50, bias=False)
      (Ui2): Linear(in_features=50, out_features=50, bias=False)
      (Uj2): Linear(in_features=50, out_features=50, bias=False)
      (Vi2): Linear(in_features=50, out_features=50, bias=False)
      (Vj2): Linear(in_features=50, out_features=50, bias=False)
      (bn1): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (R): Linear(in_features=50, out_features=50, bias=False




iteration= 100, loss(100iter)= 0.229, lr= 0.00075000, time(100iter)= 3.95
accuracy= 95.022

iteration= 200, loss(100iter)= 0.084, lr= 0.00075000, time(100iter)= 3.91
accuracy= 97.421

iteration= 300, loss(100iter)= 0.030, lr= 0.00075000, time(100iter)= 3.75
accuracy= 98.871

iteration= 400, loss(100iter)= 0.159, lr= 0.00060000, time(100iter)= 3.75
accuracy= 95.227

iteration= 500, loss(100iter)= 0.074, lr= 0.00060000, time(100iter)= 3.72
accuracy= 97.609

iteration= 600, loss(100iter)= 0.092, lr= 0.00048000, time(100iter)= 3.56
accuracy= 95.713

iteration= 700, loss(100iter)= 0.118, lr= 0.00038400, time(100iter)= 3.71
accuracy= 94.917

iteration= 800, loss(100iter)= 0.024, lr= 0.00038400, time(100iter)= 3.72
accuracy= 98.071

iteration= 900, loss(100iter)= 0.136, lr= 0.00030720, time(100iter)= 3.87
accuracy= 94.358

iteration= 1000, loss(100iter)= 0.034, lr= 0.00030720, time(100iter)= 3.91
accuracy= 96.023

iteration= 1100, loss(100iter)= 0.044, lr= 0.00024576, time(100iter)= 4.00
acc




loss(100 pre-saved data)= 0.050, accuracy(100 pre-saved data)= 98.850


In [6]:
#print(result)