In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import pdb 
import time
import numpy as np
import pickle
from sklearn.metrics import confusion_matrix

import os 

if torch.cuda.is_available():
    print('cuda available')
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
    #torch.cuda.manual_seed(1)
else:
    print('cuda not available')
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor
    #torch.manual_seed(1)
    
# Import files in folder util
import sys
sys.path.insert(0, 'util/')
import block 
import graph_generator as g

# Helper methods for loading CORA Graph
from utils import load_data2, accuracy

cuda available


In [19]:
# Setup for Cora Dataset
seed = 42
lr = 0.01
weight_decay = 5e-4
hidden = 16
epochs = 200
fastmode= False
nclass = 7

np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

# Load data: 7 classes
adj, features, labels, idx_train, idx_val, idx_test = load_data2()
labels = labels.numpy().copy()

cora_labels = labels
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
adj = adj.toarray()
adj += np.eye(adj.shape[0])

cora_Estart = np.zeros((20000, 2708))
cora_Eend = np.zeros((20000, 2708))

# converting adjacency matrix to edge-to-start, edge-to-end vertex matrix
count = 0 
for i in range(adj.shape[0]):
    for j in range(adj.shape[1]):
        if adj[i,j] == 1:
            cora_Estart[count,i] = 1
            cora_Eend[count,j] = 1
            count += 1

cora_Estart = cora_Estart[:count]
cora_Eend = cora_Eend[:count]

Loading cora dataset...


Questions to ask: does RGGCN support non-adj matrix? Currently we are just adding nodes i->j but not j->i. Does RGGCN require self-edges?

In [20]:
cora_Estart.shape

(13264, 2708)

In [4]:
class OurConvNetcell(nn.Module):
    def __init__(self, dim_in, dim_out, dropout=0):
        super(OurConvNetcell, self).__init__()
    
        # conv1
        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Uj1 = nn.Linear(dim_in, dim_out, bias=False)
        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)  
        self.bu1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        self.dropout = dropout
        
        # conv2
        self.Ui2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Uj2 = nn.Linear(dim_out, dim_out, bias=False)
        self.Vi2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)  
        self.bu2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # bn1, bn2
        self.bn1 = torch.nn.BatchNorm1d(dim_out)
        self.bn2 = torch.nn.BatchNorm1d(dim_out)
        
        # resnet
        self.R = nn.Linear(dim_in, dim_out, bias=False) 
        
        # init
        self.init_weights_OurConvNetcell(dim_in, dim_out, 1)
        
         
    def init_weights_OurConvNetcell(self, dim_in, dim_out, gain):   
        # conv1
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.Ui1.weight.data.uniform_(-scale, scale) 
        self.Uj1.weight.data.uniform_(-scale, scale) 
        self.Vi1.weight.data.uniform_(-scale, scale) 
        self.Vj1.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu1.data.fill_(0)
        self.bv1.data.fill_(0)
        
        # conv2
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.Ui2.weight.data.uniform_(-scale, scale) 
        self.Uj2.weight.data.uniform_(-scale, scale) 
        self.Vi2.weight.data.uniform_(-scale, scale) 
        self.Vj2.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu2.data.fill_(0)
        self.bv2.data.fill_(0)
        
        # RN
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.R.weight.data.uniform_(-scale, scale)  
        
        
    def forward(self, x, E_start, E_end):
        # E_start, E_end : E x V
        
#        E_start = F.dropout(E_start, self.dropout, training=self.training)
#        E_end = F.dropout(E_end, self.dropout, training=self.training)
#         if self.training:
#             # Edge Dropout
#             num_edges = E_start.shape[0]
#             dropout_idx = np.array(range(num_edges))
#             np.random.shuffle(dropout_idx)
#             E_start = E_start.clone()
#             E_start[dropout_idx[:int(num_edges*self.dropout)]] = 0
#             E_end = E_end.clone()
#             E_end[dropout_idx[:int(num_edges*self.dropout)]] = 0
            
        xin = x
        
        # conv1
        Uix = self.Ui1(x)  #  V x H_out
        Vix = self.Vi1(x)  #  V x H_out
        Vjx = self.Vj1(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
        x1 = F.sigmoid(x1)
#        x1 = F.dropout(x1, self.dropout, training=self.training)

        x2 = torch.mm(E_start, Uix)  #  E x H_out
#         x = torch.mm(E_end.t(), x1*x2)
        Ujx = self.Uj1(x)  #  V x H_out
        x = Ujx + torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
        
        # bn1
        x = self.bn1(x)
        # relu1
        x = F.relu(x)
        
        #x = F.dropout(x, self.dropout, training=self.training)
        
        # conv2
        Uix = self.Ui2(x)  #  V x H_out
        Vix = self.Vi2(x)  #  V x H_out
        Vjx = self.Vj2(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
        x1 = F.sigmoid(x1)
#        x1 = F.dropout(x1, self.dropout, training=self.training)
        
        x2 = torch.mm(E_start, Uix)  #  V x H_out
#         x = torch.mm(E_end.t(), x1*x2)
        
        Ujx = self.Uj2(x)  #  V x H_out
        x = Ujx + torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out
        
        # bn2
        x = self.bn2(x)
        # addition
        x = x + self.R(xin)
        # relu2
        x = F.relu(x)
        
        #x = F.dropout(x, self.dropout, training=self.training)
        
        return x
        
class Graph_OurConvNet(nn.Module):
    
    def __init__(self, net_parameters, cora=False, dropout=0):
        super(Graph_OurConvNet, self).__init__()
        
        # parameters
        flag_task = task_parameters['flag_task']
        Voc = net_parameters['Voc']
        D = net_parameters['D']
        nb_clusters_target = net_parameters['nb_clusters_target']
        H = net_parameters['H']
        L = net_parameters['L']
        self.cora = cora
        self.dropout = dropout
        
        # vector of hidden dimensions
        net_layers = []
        for layer in range(L):
            net_layers.append(H)

        # embedding
        #self.encoder = nn.Embedding(Voc, D, scale_grad_by_freq=False, sparse=False)   
        #self.fc0 = nn.Linear(1433,50) 

        
        # CL cells
        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
        #net_layers_extended = [50] + net_layers # testing adding FC first for embedding
        net_layers_extended = [net_parameters['features']] + net_layers 
        
        L = len(net_layers)
        list_of_gnn_cells = [] # list of NN cells
        print('Net layers', net_layers_extended)
        for layer in range(L//2):
            Hin, Hout = net_layers_extended[2*layer], net_layers_extended[2*layer+2]
            print('Hin', Hin, 'Hout', Hout)
            list_of_gnn_cells.append(OurConvNetcell(Hin,Hout, dropout))
            
        Hfinal = net_layers_extended[-1]
        #list_of_gnn_cells.append(OurConvNetcell(Hfinal,nb_clusters_target, 0))
        
        # register the cells for pytorch
        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
           
            
            
        # fc
        self.fc = nn.Linear(Hfinal,nb_clusters_target) 
        
        # init
        self.init_weights_Graph_OurConvNet(Voc,D,Hfinal,nb_clusters_target,1)
        
        # print
        print('\nnb of hidden layers=',L)
        print('dim of layers (w/ embed dim)=',net_layers_extended)      
        print('\n')
        
        # class variables
        self.D = D
        self.L = L
        self.net_layers_extended = net_layers_extended      
        self.flag_task = flag_task
        
        
    def init_weights_Graph_OurConvNet(self, Fin_enc, Fout_enc, Fin_fc, Fout_fc, gain):
        #scale = gain* np.sqrt( 2.0/ Fin_enc )
        #self.encoder.weight.data.uniform_(-scale, scale)  
        scale = gain* np.sqrt( 2.0/ Fin_fc )
        self.fc.weight.data.uniform_(-scale, scale)  
        self.fc.bias.data.fill_(0)  
        
        # Embedding Init
#         scale = gain* np.sqrt( 2.0/ 1433 )
#         self.fc0.weight.data.uniform_(-scale, scale)  
#         self.fc0.bias.data.fill_(0)  
    
            
    def forward(self, x, E_start, E_end):
        if self.training:
            # Edge Dropout
            num_edges = E_start.shape[0]
            dropout_idx = np.array(range(num_edges))
            np.random.shuffle(dropout_idx)
            E_start = E_start.clone()
            E_start[dropout_idx[:int(num_edges*self.dropout)]] = 0
            E_end = E_end.clone()
            E_end[dropout_idx[:int(num_edges*self.dropout)]] = 0

        # convnet cells  
        #x = self.fc0(x)
        for layer in range(self.L//2):
            gnn_layer = self.gnn_cells[layer]            
            x = gnn_layer(x,E_start,E_end) # V x Hfinal
            
            #gnn_layer = self.gnn_cells[self.L//2-1]            
            #x = gnn_layer(x,E_start,E_end) # V x Hfinal
        #x = F.dropout(x, self.dropout, training=self.training)
        # FC
        x = self.fc(x)
        return x
         
    def loss(self, y, y_target, weight):
        loss = nn.CrossEntropyLoss(weight=weight.type(dtypeFloat))(y,y_target)
        return loss
       
    def update(self, lr, l2):
        update = torch.optim.Adam( self.parameters(), lr=lr, weight_decay=l2)
        return update
    
    
    def update_learning_rate(self, optimizer, lr):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer
    
    def nb_param(self):
        return self.nb_param

In [5]:
def get_cora_dataset():
    y = Variable(torch.from_numpy(cora_labels))

    nb_classes = len(np.unique(cora_labels)) 
    E_start = Variable(torch.from_numpy(cora_Estart).float())
    E_end = Variable(torch.from_numpy(cora_Eend).float())
    
    return y.cuda(), E_start.cuda(), E_end.cuda(), labels.shape[0]

In [6]:
def calculate_loss(loss_criterion, nb_classes, labels, pred_y, cora=False):
    cluster_sizes = np.zeros(nb_classes)
    for r in range(nb_classes):
        cluster = np.where(labels==r)[0] # returns list of indices where label of node = r 
        cluster_sizes[r] = len(cluster) # update size of cluster labelled r
    weight = torch.zeros(nb_classes)
    for r in range(nb_classes):
        sumj = 0
        for j in range(nb_classes):
            if j!=r:
                sumj += cluster_sizes[j]
        weight[r] = sumj/ V 
    loss = loss_criterion(pred_y,labels,weight)
    return loss

def calculate_avg_accuracy(nb_classes, labels, pred_y, overall=False):
    S = labels.data.cpu().numpy()
    C = np.argmax(torch.nn.Softmax(dim=0)(pred_y).data.cpu().numpy() , axis=1)
    if not overall:
        # computes avg of accuracy over clusters
        CM = confusion_matrix(S,C).astype(np.float32)
        for r in range(nb_classes):
            cluster = np.where(labels==r)[0] 
            CM[r,:] /= cluster.shape[0] # divide by no. of elements in cluster r
        accuracy = np.sum(np.diag(CM))/ nb_classes
        return accuracy, CM
    else:
        # computes overall accuracy
        return np.sum(S==C)/S.shape[0], 0

def update_lr(net, optimizer, average_loss, average_loss_old, lr, decay_rate):
    if average_loss > 0.99* average_loss_old:
        lr /= decay_rate
        print('Updating to', lr)
    average_loss_old = average_loss
    return net.update_learning_rate(optimizer, lr)

In [27]:
CORA = 1

task_parameters = {}
net_parameters = {}

task_parameters['nb_communities'] = 7
net_parameters['D'] = net_parameters['H'] = 50
net_parameters['L'] = 4
net_parameters['features'] = features.shape[1]
net_parameters['Dropout'] = 0.3

# semi-supervised clustering
task_parameters['flag_task'] = 'clustering'

# network parameters
net_parameters['Voc'] = 7+1 #task_parameters['Voc']
net_parameters['nb_clusters_target'] = 7 

net = Graph_OurConvNet(net_parameters, CORA, net_parameters['Dropout'])
if torch.cuda.is_available():
    net.cuda()
# number of network parameters
nb_param = 0
for param in net.parameters():
    nb_param += np.prod(list(param.data.size()))
print('nb_param=',nb_param,' L=',net_parameters['L'])

Net layers [1433, 50, 50, 50, 50]
Hin 1433 Hout 50
Hin 50 Hout 50

nb of hidden layers= 4
dim of layers (w/ embed dim)= [1433, 50, 50, 50, 50]


nb_param= 391907  L= 4


In [28]:
### optimization parameters
max_iters = 150
batch_iters = 10
lr = 0.0002
l2 = 0.001
decay_rate = 1.25
nb_classes = net_parameters['nb_clusters_target']

# Optimizer
optimizer = net.update(lr, l2) 

#############
# loop over epochs
#############
t_start = time.time()
t_start_total = time.time()
average_loss_old = 1e10
running_train_acc = running_accuracy = running_conf_mat = running_total = running_loss = 0.0
tab_results = []

for iteration in range(max_iters):  # loop over the dataset multiple times
    # forward, loss
    net.train()

    train_y, E_start, E_end, V = get_cora_dataset()
    features_x = Variable(torch.FloatTensor(features).type(dtypeFloat), requires_grad=False)
    pred_y = net.forward(features_x, E_start, E_end)
    loss = calculate_loss(net.loss, nb_classes, train_y[:140], pred_y[:140])
    
    # backward, update
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    # confusion matrix
    net.eval()

    y_eval = net.forward(features_x, E_start, E_end)    
    train_acc, train_CM = calculate_avg_accuracy(nb_classes, train_y[:140], pred_y[:140]) # training acc
    accuracy, CM = calculate_avg_accuracy(nb_classes, train_y[500:1500], y_eval[500:1500]) # val acc
    running_train_acc += train_acc    

    running_conf_mat += CM
    running_accuracy += accuracy    
    running_loss += loss.data[0] # increment loss
    running_total += 1

    # learning rate, print results
    if not iteration%batch_iters:
        # confusion matrix and accuracy
        average_conf_mat = running_conf_mat/ running_total
        average_accuracy = running_accuracy/ running_total
        average_loss = running_loss/ running_total
        
        # update learning rate 
        optimizer = update_lr(net, optimizer, average_loss, average_loss_old, lr, decay_rate)

        # save intermediate results
        tab_results.append([iteration,average_loss,100* average_accuracy, time.time()-t_start_total])

        # print results
        print('\niteration= %d, loss(%diter)= %.3f, lr= %.8f, time(%diter)= %.2f' % 
              (iteration, batch_iters, average_loss, lr, batch_iters, time.time() - t_start))
        #print('Confusion matrix= \n', 100* average_conf_mat)
        print('val accuracy= %.3f' % (100* average_accuracy))
        print('train accuracy= %.3f' % (100* running_train_acc/running_total))
        
        # reset counters
        t_start = time.time()
        running_train_acc = running_accuracy = running_conf_mat = running_total = running_loss = 0.0





iteration= 0, loss(10iter)= 1.957, lr= 0.00020000, time(10iter)= 0.56
val accuracy= 12.699
train accuracy= 10.315

iteration= 10, loss(10iter)= 1.831, lr= 0.00020000, time(10iter)= 4.75
val accuracy= 12.174
train accuracy= 25.126

iteration= 20, loss(10iter)= 1.676, lr= 0.00020000, time(10iter)= 4.55
val accuracy= 12.954
train accuracy= 42.230

iteration= 30, loss(10iter)= 1.546, lr= 0.00020000, time(10iter)= 4.56
val accuracy= 12.389
train accuracy= 53.063

iteration= 40, loss(10iter)= 1.448, lr= 0.00020000, time(10iter)= 4.49
val accuracy= 14.747
train accuracy= 59.284

iteration= 50, loss(10iter)= 1.331, lr= 0.00020000, time(10iter)= 4.63
val accuracy= 20.950
train accuracy= 62.423

iteration= 60, loss(10iter)= 1.211, lr= 0.00020000, time(10iter)= 4.54
val accuracy= 35.084
train accuracy= 63.269

iteration= 70, loss(10iter)= 1.113, lr= 0.00020000, time(10iter)= 4.50
val accuracy= 46.631
train accuracy= 66.015

iteration= 80, loss(10iter)= 0.987, lr= 0.00020000, time(10iter)= 4.63
v

KeyboardInterrupt: 

In [26]:
############            
# Evaluation
############
running_accuracy = running_conf_mat = running_total = running_loss = 0.0
iters = 1

for iteration in range(iters):
    net.eval()
    labels, E_start, E_end, V = get_cora_dataset()
    features_x = Variable(torch.FloatTensor(features).type(dtypeFloat), requires_grad=False)
    y = net.forward(features_x, E_start, E_end)
    loss = calculate_loss(net.loss, nb_classes, labels[500:1500], y[500:1500])
    
    running_loss += loss.data[0]
    running_total += 1

    # confusion matrix
    accuracy, CM = calculate_avg_accuracy(nb_classes, labels[500:1500], y[500:1500], overall=False)
    running_conf_mat += CM
    running_accuracy += accuracy    
    
# print results
average_accuracy = running_accuracy/ running_total
average_loss = running_loss/ running_total
print('\nloss(100 pre-saved data)= %.3f, accuracy(100 pre-saved data)= %.3f' % (average_loss,100* average_accuracy))


loss(100 pre-saved data)= 1.404, accuracy(100 pre-saved data)= 53.217


  
