In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import pdb 
import time
import numpy as np
import pickle
from sklearn.metrics import confusion_matrix

import os 

if torch.cuda.is_available():
    print('cuda available')
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
else:
    print('cuda not available')
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor
    
# Helper methods for loading CORA Graph
from utils import load_data2, accuracy

cuda available


### Setup for Cora Dataset

In [2]:
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

# Load data: 7 classes
adj, features, labels, idx_train, idx_val, idx_test = load_data2()
cora_labels = labels.numpy().copy()

adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
adj = adj.toarray()
adj += np.eye(adj.shape[0])

cora_Estart = np.zeros((20000, 2708))
cora_Eend = np.zeros((20000, 2708))

# converting adjacency matrix to edge-to-start, edge-to-end vertex matrix
count = 0 
for i in range(adj.shape[0]):
    for j in range(adj.shape[1]):
        if adj[i,j] == 1:
            cora_Estart[count,i] = 1
            cora_Eend[count,j] = 1
            count += 1

cora_Estart = cora_Estart[:count]
cora_Eend = cora_Eend[:count]

def get_cora_dataset():
    y = Variable(torch.from_numpy(cora_labels))
    E_start = Variable(torch.from_numpy(cora_Estart).float())
    E_end = Variable(torch.from_numpy(cora_Eend).float())
    
    return y.cuda(), E_start.cuda(), E_end.cuda(), labels.shape[0]

Loading cora dataset...


In [65]:
class OurConvNetcell(nn.Module):
    def __init__(self, dim_in, dim_out, dropout=0):
        super(OurConvNetcell, self).__init__()
    
        # conv1
        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Uj1 = nn.Linear(dim_in, dim_out, bias=False)
        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)  
        self.bu1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        self.dropout = dropout
        
        # conv2
        self.Ui2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Uj2 = nn.Linear(dim_out, dim_out, bias=False)
        self.Vi2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)  
        self.bu2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # bn1, bn2
        self.bn1 = torch.nn.BatchNorm1d(dim_out)
        self.bn2 = torch.nn.BatchNorm1d(dim_out)
        
        # resnet
        self.R = nn.Linear(dim_in, dim_out, bias=False) 
        
        # init
        self.init_weights_OurConvNetcell(dim_in, dim_out, 1)
        
         
    def init_weights_OurConvNetcell(self, dim_in, dim_out, gain):   
        # conv1
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.Ui1.weight.data.uniform_(-scale, scale) 
        self.Uj1.weight.data.uniform_(-scale, scale) 
        self.Vi1.weight.data.uniform_(-scale, scale) 
        self.Vj1.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu1.data.fill_(0)
        self.bv1.data.fill_(0)
        
        # conv2
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.Ui2.weight.data.uniform_(-scale, scale) 
        self.Uj2.weight.data.uniform_(-scale, scale) 
        self.Vi2.weight.data.uniform_(-scale, scale) 
        self.Vj2.weight.data.uniform_(-scale, scale) 
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.bu2.data.fill_(0)
        self.bv2.data.fill_(0)
        
        # RN
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.R.weight.data.uniform_(-scale, scale)  
        
        
    def forward(self, x, E_start, E_end):
        # Edge Start/End Dropout
#         E_start = F.dropout(E_start, self.dropout, training=self.training)
#         E_end = F.dropout(E_end, self.dropout, training=self.training)

        # Edge Start+End Dropout for conv block
#         if self.training:
#             num_edges = E_start.shape[0]
#             dropout_idx = np.array(range(num_edges))
#             np.random.shuffle(dropout_idx)
#             E_start = E_start.clone()
#             E_start[dropout_idx[:int(num_edges*self.dropout)]] = 0
#             E_end = E_end.clone()
#             E_end[dropout_idx[:int(num_edges*self.dropout)]] = 0
        
        x = F.dropout(x, self.dropout, training=self.training)
        xin = x
        
        # conv1
        Uix = self.Ui1(x)  #  V x H_out
        Vix = self.Vi1(x)  #  V x H_out
        Vjx = self.Vj1(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
        x1 = F.sigmoid(x1)
#         x1 = F.dropout(x1, self.dropout, training=self.training) # Edge Gating Dropout

        x2 = torch.mm(E_start, Uix)  #  E x H_out
        Ujx = self.Uj1(x)  #  V x H_out
        x = Ujx + torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
        x = self.bn1(x) # bn1
        x = F.relu(x) # relu1
        x = F.dropout(x, self.dropout, training=self.training) # Output Dropout
        
        # conv2
        Uix = self.Ui2(x)  #  V x H_out
        Vix = self.Vi2(x)  #  V x H_out
        Vjx = self.Vj2(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
        x1 = F.sigmoid(x1)
#         x1 = F.dropout(x1, self.dropout, training=self.training) # Edge Gating Dropout
        
        x2 = torch.mm(E_start, Uix)  #  V x H_out        
        Ujx = self.Uj2(x)  #  V x H_out
        x = Ujx + torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out
        x = self.bn2(x) # bn2
        x = x + self.R(xin) # addition
        x = F.relu(x) # relu2
#         x = F.dropout(x, self.dropout, training=self.training) # Output Dropout
        
        return x
        
class Graph_OurConvNet(nn.Module):
    def __init__(self, net_parameters, cora=False, dropout=0):
        super(Graph_OurConvNet, self).__init__()
        
        # parameters
        Voc = net_parameters['Voc']
        D = net_parameters['D']
        nb_clusters_target = net_parameters['nb_clusters_target']
        H = net_parameters['H']
        L = net_parameters['L']
        self.cora = cora
        self.dropout = dropout
        
        # vector of hidden dimensions
        net_layers = []
        for layer in range(L):
            net_layers.append(H)
        
        # CL cells
        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
        net_layers_extended = [net_parameters['features']] + net_layers 
        
        L = len(net_layers)
        list_of_gnn_cells = [] # list of NN cells
        print('Net layers', net_layers_extended)
        for layer in range(L//2):
            Hin, Hout = net_layers_extended[2*layer], net_layers_extended[2*layer+2]
            print('Hin', Hin, 'Hout', Hout)
            list_of_gnn_cells.append(OurConvNetcell(Hin,Hout, dropout))
        
        # register the cells for pytorch
        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
            
        # fc
        Hfinal = net_layers_extended[-1]
        self.fc = nn.Linear(Hfinal,nb_clusters_target) 
        
        # init
        self.init_weights_Graph_OurConvNet(Voc,D,Hfinal,nb_clusters_target,1)
        
        # print
        print('\nnb of hidden layers=',L)
        print('dim of layers (w/ embed dim)=',net_layers_extended)      
        print('\n')
        
        # class variables
        self.D = D
        self.L = L
        self.net_layers_extended = net_layers_extended      
        
        
    def init_weights_Graph_OurConvNet(self, Fin_enc, Fout_enc, Fin_fc, Fout_fc, gain):
        scale = gain* np.sqrt(2.0/ (Fin_fc+Fout_fc))
        self.fc.weight.data.uniform_(-scale, scale)  
        self.fc.bias.data.fill_(0)  
        
    def forward(self, x, E_start, E_end):
        if self.training:
            # Edge Start+End Dropout for all layers
            num_edges = E_start.shape[0]
            dropout_idx = np.array(range(num_edges))
            np.random.shuffle(dropout_idx)
            E_start = E_start.clone()
            E_start[dropout_idx[:int(num_edges*self.dropout)]] = 0
            E_end = E_end.clone()
            E_end[dropout_idx[:int(num_edges*self.dropout)]] = 0

        # convnet cells  
        for layer in range(self.L//2):
            gnn_layer = self.gnn_cells[layer]            
            x = gnn_layer(x,E_start,E_end) # V x H
            
        x = F.dropout(x, self.dropout, training=self.training) #FC Dropout
        x = self.fc(x) # FC
        return x
         
    def loss(self, y, y_target, weight):
        loss = nn.CrossEntropyLoss()(y,y_target)
        return loss
       
    def update(self, lr, l2):
        update = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=l2)
        return update
    
    def update_learning_rate(self, optimizer, lr):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer
    
    def nb_param(self):
        return self.nb_param

In [157]:
def calculate_avg_accuracy(nb_classes, labels, pred_y):
    S = labels.data.cpu().numpy()
    C = np.argmax(torch.nn.Softmax(dim=1)(pred_y).data.cpu().numpy() , axis=1)
    return np.sum(S==C)/S.shape[0]

def update_lr(net, optimizer, average_loss, average_loss_old, lr, decay_rate):
    #print("Average Loss", average_loss, "Average Loss Old", average_loss_old)
    if average_loss > average_loss_old:
        lr /= decay_rate
        print('Updating LR to %.7f' % lr)
    return net.update_learning_rate(optimizer, lr), lr

def print_results(iteration, batch_iters, running_train_acc, running_total, running_train_loss, lr, t_start):
    print('\niteration= %d, train loss(%diter)= %.3f, lr= %.7f, time(%diter)= %.2f' % 
          (iteration, batch_iters, running_train_loss/ running_total, lr, 
           batch_iters, time.time() - t_start))
    print('val accuracy= %.3f' % (100* val_accuracy))
    print('train accuracy= %.3f' % (100* running_train_acc/running_total))


In [161]:
net_parameters = {}
net_parameters['D'] = net_parameters['H'] = 50
net_parameters['L'] = 6
net_parameters['features'] = features.shape[1]
net_parameters['Dropout'] = 0.3
net_parameters['Voc'] = 7+1 
net_parameters['nb_clusters_target'] = 7 

net = Graph_OurConvNet(net_parameters, CORA, net_parameters['Dropout'])
if torch.cuda.is_available():
    net.cuda()
    
# number of network parameters
nb_param = 0
for param in net.parameters():
    nb_param += np.prod(list(param.data.size()))
print('nb_param=',nb_param,' L=',net_parameters['L'])

Net layers [1433, 50, 50, 50, 50, 50, 50]
Hin 1433 Hout 50
Hin 50 Hout 50
Hin 50 Hout 50

nb of hidden layers= 6
dim of layers (w/ embed dim)= [1433, 50, 50, 50, 50, 50, 50]


nb_param= 414807  L= 6


In [162]:
### optimization parameters
max_iters = 1500
batch_iters = 20
lr = 0.0005
l2 = 0.002
decay_rate = 1.25
nb_classes = net_parameters['nb_clusters_target']

# Optimizer
optimizer = net.update(lr, l2) 

#############
# loop over epochs
#############
t_start = time.time()
t_start_total = time.time()
average_loss_old = torch.tensor(1e4).cuda()
running_train_acc = running_total = running_train_loss = running_val_loss = 0.0
tab_results = []

train_y, E_start, E_end, V = get_cora_dataset()
features_x = Variable(torch.FloatTensor(features).type(dtypeFloat), requires_grad=False)

for iteration in range(max_iters):  # loop over the dataset multiple times
    # forward, loss
    net.train()
    pred_y = net.forward(features_x, E_start, E_end)
    loss = net.loss(pred_y[:140], train_y[:140], None) 
    
    # backward, update
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    # confusion matrix
    net.eval()

    train_acc = calculate_avg_accuracy(nb_classes, train_y[:140], pred_y[:140]) # training acc
    running_train_acc += train_acc    
    running_train_loss += loss.data[0]
    running_total += 1

    y_eval = net.forward(features_x, E_start, E_end)
    loss = net.loss(y_eval[idx_val], train_y[idx_val], None) 
    running_val_loss += loss.data[0]
    
    # learning rate, print results
    if not iteration%batch_iters:
        val_accuracy = calculate_avg_accuracy(nb_classes, train_y[idx_val], y_eval[idx_val])
        average_loss = running_val_loss/ running_total
        
        # update learning rate 
        optimizer, lr = update_lr(net, optimizer, average_loss, average_loss_old, lr, decay_rate)
        
        # save intermediate results
        tab_results.append([iteration,average_loss,100* val_accuracy, time.time()-t_start_total])

        print_results(iteration, batch_iters, running_train_acc, 
                      running_total, running_train_loss, lr, t_start)
        
        if lr < torch.tensor(5e-5).cuda():
            print("Early Stopping")
            break
        
        # reset counters
        t_start = time.time()
        running_train_acc = running_total = running_train_loss = running_val_loss = 0.0
        average_loss_old = average_loss
        





iteration= 0, train loss(20iter)= 1.951, lr= 0.0005000, time(20iter)= 0.62
val accuracy= 21.333
train accuracy= 12.143

iteration= 20, train loss(20iter)= 1.868, lr= 0.0005000, time(20iter)= 4.96
val accuracy= 24.333
train accuracy= 26.286

iteration= 40, train loss(20iter)= 1.712, lr= 0.0005000, time(20iter)= 4.91
val accuracy= 43.333
train accuracy= 37.179

iteration= 60, train loss(20iter)= 1.538, lr= 0.0005000, time(20iter)= 4.91
val accuracy= 44.000
train accuracy= 45.929

iteration= 80, train loss(20iter)= 1.365, lr= 0.0005000, time(20iter)= 4.91
val accuracy= 57.000
train accuracy= 52.214
Updating LR to 0.0004000

iteration= 100, train loss(20iter)= 1.185, lr= 0.0004000, time(20iter)= 4.92
val accuracy= 60.667
train accuracy= 55.107

iteration= 120, train loss(20iter)= 1.070, lr= 0.0004000, time(20iter)= 4.91
val accuracy= 62.000
train accuracy= 57.607

iteration= 140, train loss(20iter)= 0.972, lr= 0.0004000, time(20iter)= 4.91
val accuracy= 62.000
train accuracy= 60.643

iter

### Evaluation

In [163]:
running_accuracy = running_total = running_loss = 0.0
iters = 1

net.eval()
labels, E_start, E_end, V = get_cora_dataset()
features_x = Variable(torch.FloatTensor(features).type(dtypeFloat), requires_grad=False)
y = net.forward(features_x, E_start, E_end)

loss = calculate_loss(net.loss, nb_classes, labels[idx_test], y[idx_test])
accuracy = calculate_avg_accuracy(nb_classes, labels[idx_test], y[idx_test])
print('\nloss(100 pre-saved data)= %.3f, accuracy(100 pre-saved data)= %.3f' % (loss.data[0], 100* accuracy))



loss(100 pre-saved data)= 1.098, accuracy(100 pre-saved data)= 70.100


  # This is added back by InteractiveShellApp.init_path()
