In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import pdb 
import time
import numpy as np
import pickle
from utils import load_data2, accuracy

import os 

if torch.cuda.is_available():
    print('cuda available')
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
    #torch.cuda.manual_seed(1)
else:
    print('cuda not available')
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor
    #torch.manual_seed(1)
    
# import files in folder util
import sys
sys.path.insert(0, 'util/')
import block 
import graph_generator as g


from sklearn.metrics import confusion_matrix

cuda available


In [2]:
# Setup for Cora Dataset
if 1 == 1:
    seed = 42
    lr = 0.01
    weight_decay = 5e-4
    hidden = 50
    epochs = 200
    fastmode= False
    nclass = 7

    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    # Load data: 7 classes
    adj, features, labels, idx_train, idx_val, idx_test = load_data2()
    labels = labels.numpy().copy()

    cora_labels = labels
    
    adj = adj.toarray()
    adj += np.eye(adj.shape[0])
    
    cora_Estart = np.zeros((10000, 2708))
    cora_Eend = np.zeros((10000, 2708))

    # converting adjacency matrix to edge-to-start vertex matrix
    count = 0
    for i in range(adj.shape[0]):
        for j in range(adj.shape[1]):
            if adj[i,j] == 1:
                cora_Estart[count,i] = 1
                cora_Eend[count,j] = 1
                count += 1

    cora_Estart = cora_Estart[:count]
    cora_Eend = cora_Eend[:count]

Loading cora dataset...


In [3]:
class OurConvNetcell(nn.Module):
    def __init__(self, dim_in, dim_out, dropout=0):
        super(OurConvNetcell, self).__init__()
    
        # conv1
        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Uj1 = nn.Linear(dim_in, dim_out, bias=False)
        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)  
        self.bu1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        self.dropout = dropout

        # bn1, bn2
        self.bn1 = torch.nn.BatchNorm1d(dim_out)
        self.bn2 = torch.nn.BatchNorm1d(dim_out)
        
        # resnet
        self.R = nn.Linear(dim_in, dim_out, bias=False) 
            
        # self-attention
        self.a1 = nn.Linear(2*dim_out, 1, bias=False)  
        self.a2 = nn.Linear(2*dim_out, 1, bias=False)  
        self.leakyrelu = nn.LeakyReLU(0.2)
        self.dim_out = dim_out
        
        # init
        self.init_weights_OurConvNetcell(dim_in, dim_out, 1)
        
         
    def init_weights_OurConvNetcell(self, dim_in, dim_out, gain):   
        # conv1
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.Ui1.weight.data.uniform_(-scale, scale) 
        self.Uj1.weight.data.uniform_(-scale, scale) 
        self.Vi1.weight.data.uniform_(-scale, scale) 
        self.Vj1.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu1.data.fill_(0)
        self.bv1.data.fill_(0)
        
        # RN
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.R.weight.data.uniform_(-scale, scale)  
        
        # self-attention
        scale = gain* np.sqrt( 1.0/ dim_out )
        self.a1.weight.data.uniform_(-scale, scale) 
        self.a2.weight.data.uniform_(-scale, scale) 
        
    def forward(self, x, E_start, E_end):
        # E_start, E_end : E x V
        #E_start = F.dropout(E_start, self.dropout, training=self.training)
        #E_end = F.dropout(E_end, self.dropout, training=self.training)
#         if self.training:
#             # Edge Dropout
#             num_edges = E_start.shape[0]
#             dropout_idx = np.array(range(num_edges))
#             np.random.shuffle(dropout_idx)
#             E_start = E_start.clone()
#             E_start[dropout_idx[:int(num_edges*self.dropout)]] = 0
#             E_end = E_end.clone()
#             E_end[dropout_idx[:int(num_edges*self.dropout)]] = 0
            
        xin = x
        # conv1
        
        # self-attention
        Uix = self.Ui1(x)  #  V x H_out
        x1 = torch.cat((torch.mm(E_end,Uix), torch.mm(E_start,Uix)), dim=1)  # E x 2H_out
        x1 = self.leakyrelu(self.a1(x1)) # E x 1
        x1 = torch.exp(x1)
        x_ = torch.mm(E_start.t(), x1) # V x 1
        x_ = torch.mm(E_start, x_) # E x 1
        x1 = x1/x_  # normalize x1
        x1 = x1.repeat(1,self.dim_out) # E x H_out
        
        F.dropout(x1, self.dropout, training=self.training)

        x2 = torch.mm(E_start, Uix)  #  E x H_out
        x = torch.mm(E_end.t(), x1*x2)
        x = F.elu(x)
        
        return x
        
class Graph_OurConvNet(nn.Module):
    
    def __init__(self, net_parameters, cora=False, dropout=0):
        super(Graph_OurConvNet, self).__init__()
        
        # parameters
        flag_task = task_parameters['flag_task']
        Voc = net_parameters['Voc']
        D = net_parameters['D']
        nb_clusters_target = net_parameters['nb_clusters_target']
        H = net_parameters['H']
        L = net_parameters['L']
        self.cora = cora
        self.dropout = dropout
        
        # vector of hidden dimensions
        net_layers = []
        for layer in range(L):
            net_layers.append(H)

        # embedding
        self.encoder = nn.Embedding(Voc, D, scale_grad_by_freq=False, sparse=False)      
        
        # CL cells
        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
        net_layers_extended = [D] + net_layers # include embedding dim
        L = len(net_layers)
        list_of_gnn_cells = [] # list of NN cells
        for layer in range(L//2):
            Hin, Hout = net_layers_extended[2*layer], net_layers_extended[2*layer+2]
            list_of_gnn_cells.append(OurConvNetcell(Hin,Hout, dropout))
        
        
        Hfinal = net_layers_extended[-1]
        list_of_gnn_cells.append(OurConvNetcell(Hfinal,nb_clusters_target, 0))
        
        # register the cells for pytorch
        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
              
        # fc
        
        self.fc = nn.Linear(Hfinal,nb_clusters_target) 
        
        
        
        # init
        self.init_weights_Graph_OurConvNet(Voc,D,Hfinal,nb_clusters_target,1)
        
        # print
        print('\nnb of hidden layers=',L)
        print('dim of layers (w/ embed dim)=',net_layers_extended)      
        print('\n')
        
        # class variables
        self.D = D
        self.L = L
        self.net_layers_extended = net_layers_extended      
        self.flag_task = flag_task
        
        
    def init_weights_Graph_OurConvNet(self, Fin_enc, Fout_enc, Fin_fc, Fout_fc, gain):
        scale = gain* np.sqrt( 2.0/ Fin_enc )
        self.encoder.weight.data.uniform_(-scale, scale)  
        scale = gain* np.sqrt( 2.0/ Fin_fc )
        self.fc.weight.data.uniform_(-scale, scale)  
        self.fc.bias.data.fill_(0)  
    
            
    def forward(self, x, E_start, E_end):
        # convnet cells  
        if not self.cora:
            x = self.encoder(x) # V x D
            for layer in range(self.L//2):
                gnn_layer = self.gnn_cells[layer]            
                x = gnn_layer(x,E_start,E_end) # V x Hfinal
        else:
            for layer in range(self.L//2+1):
                gnn_layer = self.gnn_cells[layer]            
                x = gnn_layer(x,E_start,E_end) # V x Hfinal
                #x = F.dropout(x, self.dropout, training=self.training)
            
        # FC
        #x = self.fc(x)
        return x
         
    def loss(self, y, y_target, weight):
        loss = nn.CrossEntropyLoss(weight=weight.type(dtypeFloat))(y,y_target)
        return loss
       
    def update(self, lr):
        update = torch.optim.Adam( self.parameters(), lr=lr )
        return update
    
    
    def update_learning_rate(self, optimizer, lr):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer
    
    def nb_param(self):
        return self.nb_param

In [4]:
def get_cora_dataset():
    #x = Variable(torch.from_numpy(cora_labels_train_x))
    y = Variable(torch.from_numpy(cora_labels))

    nb_classes = len(np.unique(cora_labels)) 
    E_start = Variable(torch.from_numpy(cora_Estart).float())
    E_end = Variable(torch.from_numpy(cora_Eend).float())
    
    return y.cuda(), E_start.cuda(), E_end.cuda(), labels.shape[0]

In [5]:
def calculate_loss(loss_criterion, nb_classes, labels, pred_y, cora=False):
    cluster_sizes = np.zeros(nb_classes)
    for r in range(nb_classes):
        cluster = np.where(labels==r)[0] # returns list of indices where label of node = r 
        cluster_sizes[r] = len(cluster) # update size of cluster labelled r
    weight = torch.zeros(nb_classes)
    for r in range(nb_classes):
        sumj = 0
        for j in range(nb_classes):
            if j!=r:
                sumj += cluster_sizes[j]
        weight[r] = sumj/ V 
    loss = loss_criterion(pred_y,labels,weight)
    return loss

def calculate_avg_accuracy(nb_classes, labels, pred_y, overall=False):
    S = labels.data.cpu().numpy()
    C = np.argmax(torch.nn.Softmax(dim=0)(pred_y).data.cpu().numpy() , axis=1)
    if not overall:
        # computes avg of accuracy over clusters
        CM = confusion_matrix(S,C).astype(np.float32)
        for r in range(nb_classes):
            cluster = np.where(labels==r)[0] 
            CM[r,:] /= cluster.shape[0] # divide by no. of elements in cluster r
        accuracy = np.sum(np.diag(CM))/ nb_classes
        return accuracy, CM
    else:
        # computes overall accuracy
        return np.sum(S==C)/S.shape[0], 0

def update_lr(net, optimizer, average_loss, average_loss_old, lr, decay_rate):
    if average_loss > 0.99* average_loss_old:
        lr /= decay_rate
        print('Updating to', lr)
    average_loss_old = average_loss
    return net.update_learning_rate(optimizer, lr)

In [8]:
CORA = 1

task_parameters = {}
net_parameters = {}

if CORA:
    task_parameters['nb_communities'] = 7
    net_parameters['D'] = features.shape[1]
    net_parameters['H'] = 4
    net_parameters['L'] = 2
    net_parameters['Dropout'] = 0.0

# semi-supervised clustering
task_parameters['flag_task'] = 'clustering'
task_parameters['nb_clusters_target'] = task_parameters['nb_communities']
task_parameters['Voc'] = task_parameters['nb_communities'] + 1
task_parameters['size_min'] = 5
task_parameters['size_max'] = 25
task_parameters['p'] = 0.5
task_parameters['q'] = 0.1  
file_name = 'data/set_100_clustering_maps_p05_q01_size5_25_2017-10-31_10-25-00_.txt'
with open(file_name, 'rb') as fp:
    all_trainx = pickle.load(fp)
task_parameters['all_trainx'] = all_trainx[:100]

# network parameters
net_parameters['Voc'] = task_parameters['Voc']
net_parameters['nb_clusters_target'] = task_parameters['nb_clusters_target']

net = Graph_OurConvNet(net_parameters, CORA, net_parameters['Dropout'])
if torch.cuda.is_available():
    net.cuda()
# number of network parameters
nb_param = 0
for param in net.parameters():
    nb_param += np.prod(list(param.data.size()))
print('nb_param=',nb_param,' L=',net_parameters['L'])


nb of hidden layers= 2
dim of layers (w/ embed dim)= [1433, 4, 4]


nb_param= 40409  L= 2


In [9]:
### optimization parameters
if CORA:
    max_iters = 1000
    batch_iters = 10
    lr = 0.0005
else:
    max_iters = 5000
    batch_iters = 100
    lr = 0.00075
    
decay_rate = 1.25
nb_classes = task_parameters['nb_communities']

# Optimizer
optimizer = net.update(lr) 

#############
# loop over epochs
#############
t_start = time.time()
t_start_total = time.time()
average_loss_old = 1e10
running_train_acc = running_accuracy = running_conf_mat = running_total = running_loss = 0.0
tab_results = []

for iteration in range(max_iters):  # loop over the dataset multiple times
    # forward, loss
    net.train()
    if CORA:
        train_y, E_start, E_end, V = get_cora_dataset()
        features_x = Variable(torch.FloatTensor(features).type(dtypeFloat), requires_grad=False)
        y = net.forward(features_x, E_start, E_end)
        #loss = nn.CrossEntropyLoss()(y[:140], train_y[:140])
        loss = calculate_loss(net.loss, nb_classes, train_y[:140], y[:140])
    else:
        train_x = g.graph_semi_super_clu(task_parameters) # gets a new graph dataset
        x, train_y, E_start, E_end, V = get_net_datasets(train_x)
        y = net.forward(x, E_start, E_end)    
        loss = calculate_loss(net.loss, nb_classes, train_y, y)
    
    # backward, update
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    # confusion matrix
    net.eval()
    if CORA:
        y_eval = net.forward(features_x, E_start, E_end)    
        
        # validation Set
        train_acc, train_CM = calculate_avg_accuracy(nb_classes, train_y[:140], y_eval[:140])
        accuracy, CM = calculate_avg_accuracy(nb_classes, train_y[200:500], y[200:500])
        running_train_acc += train_acc    
    else:
        accuracy, CM = calculate_avg_accuracy(nb_classes, train_y, y)
    running_conf_mat += CM
    running_accuracy += accuracy    
    running_loss += loss.data[0] # increment loss
    running_total += 1

    # learning rate, print results
    if not iteration%batch_iters:
        # confusion matrix and accuracy
        average_conf_mat = running_conf_mat/ running_total
        average_accuracy = running_accuracy/ running_total
        average_loss = running_loss/ running_total
        
        # update learning rate 
        optimizer = update_lr(net, optimizer, average_loss, average_loss_old, lr, decay_rate)

        # save intermediate results
        tab_results.append([iteration,average_loss,100* average_accuracy, time.time()-t_start_total])

        # print results
        print('\niteration= %d, loss(%diter)= %.3f, lr= %.8f, time(%diter)= %.2f' % 
              (iteration, batch_iters, average_loss, lr, batch_iters, time.time() - t_start))
        #print('Confusion matrix= \n', 100* average_conf_mat)
        print('accuracy= %.3f' % (100* average_accuracy))
        print('train accuracy= %.3f' % (100* running_train_acc/running_total))
        
        # reset counters
        t_start = time.time()
        running_train_acc = running_accuracy = running_conf_mat = running_total = running_loss = 0.0





iteration= 0, loss(10iter)= 1.947, lr= 0.00050000, time(10iter)= 0.25
accuracy= 8.849
train accuracy= 9.857

iteration= 10, loss(10iter)= 1.945, lr= 0.00050000, time(10iter)= 2.45
accuracy= 14.298
train accuracy= 22.859

iteration= 20, loss(10iter)= 1.942, lr= 0.00050000, time(10iter)= 2.48
accuracy= 21.483
train accuracy= 41.327

iteration= 30, loss(10iter)= 1.940, lr= 0.00050000, time(10iter)= 2.62
accuracy= 27.172
train accuracy= 46.502

iteration= 40, loss(10iter)= 1.937, lr= 0.00050000, time(10iter)= 2.51
accuracy= 31.629
train accuracy= 49.066

iteration= 50, loss(10iter)= 1.934, lr= 0.00050000, time(10iter)= 2.53
accuracy= 33.840
train accuracy= 53.903

iteration= 60, loss(10iter)= 1.931, lr= 0.00050000, time(10iter)= 2.46
accuracy= 35.469
train accuracy= 57.777

iteration= 70, loss(10iter)= 1.928, lr= 0.00050000, time(10iter)= 2.49
accuracy= 36.824
train accuracy= 59.049

iteration= 80, loss(10iter)= 1.924, lr= 0.00050000, time(10iter)= 2.51
accuracy= 39.099
train accuracy= 59


iteration= 730, loss(10iter)= 1.500, lr= 0.00050000, time(10iter)= 2.59
accuracy= 45.377
train accuracy= 74.311

iteration= 740, loss(10iter)= 1.492, lr= 0.00050000, time(10iter)= 2.50
accuracy= 44.863
train accuracy= 72.326

iteration= 750, loss(10iter)= 1.484, lr= 0.00050000, time(10iter)= 2.47
accuracy= 44.863
train accuracy= 71.992

iteration= 760, loss(10iter)= 1.475, lr= 0.00050000, time(10iter)= 2.49
accuracy= 44.863
train accuracy= 71.992

iteration= 770, loss(10iter)= 1.467, lr= 0.00050000, time(10iter)= 2.53
accuracy= 44.863
train accuracy= 70.906

iteration= 780, loss(10iter)= 1.459, lr= 0.00050000, time(10iter)= 2.56
accuracy= 44.877
train accuracy= 68.504

iteration= 790, loss(10iter)= 1.451, lr= 0.00050000, time(10iter)= 2.61
accuracy= 44.890
train accuracy= 68.374

iteration= 800, loss(10iter)= 1.442, lr= 0.00050000, time(10iter)= 2.54
accuracy= 44.999
train accuracy= 68.374

iteration= 810, loss(10iter)= 1.434, lr= 0.00050000, time(10iter)= 2.56
accuracy= 45.094
train 

In [33]:
############            
# Evaluation on 100 pre-saved data
############
running_accuracy = running_conf_mat = running_total = running_loss = 0.0
if CORA:
    iters = 1
else:
    iters = 100

for iteration in range(iters):
    if CORA:
        net.train()
        labels, E_start, E_end, V = get_cora_dataset()
        features_x = Variable(torch.FloatTensor(features).type(dtypeFloat), requires_grad=False)
        y = net.forward(features_x, E_start, E_end)
        loss = calculate_loss(net.loss, nb_classes, labels[500:1500], y[500:1500])
    else:
        train_x = task_parameters['all_trainx'][iteration][1]
        x, labels, E_start, E_end, V = get_net_datasets(train_x)
        y = net.forward(x, E_start, E_end)
        loss = calculate_loss(net.loss, nb_classes, labels, y)
    
    running_loss += loss.data[0]
    running_total += 1

    # confusion matrix
    accuracy, CM = calculate_avg_accuracy(nb_classes, labels[500:1500], y[500:1500], overall=False)
    running_conf_mat += CM
    running_accuracy += accuracy    
    
# print results
average_accuracy = running_accuracy/ running_total
average_loss = running_loss/ running_total
print('\nloss(100 pre-saved data)= %.3f, accuracy(100 pre-saved data)= %.3f' % (average_loss,100* average_accuracy))


loss(100 pre-saved data)= 1.878, accuracy(100 pre-saved data)= 51.592


