In [1]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import pdb 
import time
import numpy as np
import pickle
from sklearn.metrics import confusion_matrix

import sys
import os 
import random

seed = 42
random.seed(seed)
np.random.seed(seed)
CUDA = True

if CUDA and torch.cuda.is_available():
    print('cuda available')
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
    torch.cuda.manual_seed(seed)
    
# Helper methods for loading CORA Graph
from utils import load_data3, accuracy

cuda available


### Setup for Dataset

In [2]:
# Load data (GCN)
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, labels = load_data3('citeseer')
adj = adj.toarray().astype(float)
adj += np.eye(adj.shape[0])
idx_train = np.argwhere(train_mask).reshape(-1)
idx_val = np.argwhere(val_mask).reshape(-1)
idx_test = np.argwhere(test_mask).reshape(-1)
labels = torch.LongTensor(np.where(labels)[1])

  r_inv = np.power(rowsum, -1).flatten()


In [52]:
y_train[:1000]

array([[0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.],
       ...,
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [48]:
labels.shape

torch.Size([3312])

In [46]:
labels.shape

torch.Size([3312])

In [3]:
new_edges = []
for v1 in idx_train:
    for v2 in idx_train:
        if v1 != v2 and adj[v1, v2] != 1: # and labels[v1] == labels[v2]:
            new_edges.append((v1,v2))
new_edges = np.array(new_edges)

In [4]:
# For Pudmed
v = adj.shape[0]
Estart = np.zeros((40000, v))
Eend = np.zeros((40000, v))
Eidentity = [] # idx of identity edges

# converting adjacency matrix to edge-to-start, edge-to-end vertex matrix
count = 0
for i in range(v):
    for j in range(v):
        if adj[i,j] == 1:
            Estart[count,i] = 1
            Eend[count,j] = 1
            if i == j:
                Eidentity.append(count)
            count += 1
Estart = Eend[:count]
Eend = Eend[:count]

In [5]:
def dropin(new_edges, rate, dim=2708, cuda=False):
    np.random.shuffle(new_edges)
    v = new_edges.shape[0]
    E_start = np.zeros((v, dim))
    E_end = np.zeros((v, dim))
    for i in range(0, int(v*rate), 2):
        v1, v2 = new_edges[i]
        E_start[i,v1] = E_end[i,v2] = E_start[i+1,v1] = E_end[i+1,v2] = 1
    E_start = Variable(torch.from_numpy(E_start[:i+2,:]).float())
    E_end = Variable(torch.from_numpy(E_end[:i+2,:]).float())
    
    if cuda:
        return E_start.cuda(), E_end.cuda()
    return E_start, E_end

In [15]:
class OurConvNetcell(nn.Module):
    def __init__(self, dim_in, dim_out, dropout_fc=0, dropout_edge=0):
        super(OurConvNetcell, self).__init__()
    
        # conv1
        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)  
        self.bu1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        self.dropout_fc = dropout_fc
        self.dropout_edge = dropout_edge
        
        # conv2
        self.Ui2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vi2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)  
        self.bu2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # bn1, bn2
        self.bn1 = torch.nn.BatchNorm1d(dim_out)
        self.bn2 = torch.nn.BatchNorm1d(dim_out)
        
        # resnet
        self.R = nn.Linear(dim_in, dim_out, bias=False) 
        
        # init
        self.init_weights_OurConvNetcell(dim_in, dim_out, 1)
        
         
    def init_weights_OurConvNetcell(self, dim_in, dim_out, gain):   
        # conv1
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.Ui1.weight.data.uniform_(-scale, scale) 
        self.Vi1.weight.data.uniform_(-scale, scale) 
        self.Vj1.weight.data.uniform_(-scale, scale) 
        self.bu1.data.fill_(0)
        self.bv1.data.fill_(0)
        
        # conv2
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.Ui2.weight.data.uniform_(-scale, scale) 
        self.Vi2.weight.data.uniform_(-scale, scale) 
        self.Vj2.weight.data.uniform_(-scale, scale) 
        self.bu2.data.fill_(0)
        self.bv2.data.fill_(0)
        
        # RN
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.R.weight.data.uniform_(-scale, scale)  
        
        
    def forward(self, x, E_start, E_end):
        x = F.dropout(x, self.dropout_fc, training=self.training)
        xin = x
        
        # edge norm
        norm = torch.sum(E_end.t(), 1).reshape(-1,1)
        norm = torch.max(norm, torch.ones(norm.shape).cuda())

        # conv1
        Uix = self.Ui1(x)  #  V x H_out
        Vix = self.Vi1(x)  #  V x H_out
        Vjx = self.Vj1(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
        x1 = torch.sigmoid(x1)

        x2 = torch.mm(E_start, Uix)  #  E x H_out
        x = torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
        
        x = torch.div(x, norm)# norm
        x = self.bn1(x) # bn1
        x = torch.nn.LeakyReLU(0.1)(x) # relu1

        # conv2
        Uix = self.Ui2(x)  #  V x H_out
        Vix = self.Vi2(x)  #  V x H_out
        Vjx = self.Vj2(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
        x1 = torch.sigmoid(x1)
        
        x2 = torch.mm(E_start, Uix)  #  V x H_out        
        x = torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out        
        x = torch.div(x, norm) # normalization
        
        x = self.bn2(x) # bn2
        x = x + self.R(xin) # addition
        x = torch.nn.LeakyReLU(0.1)(x) # relu2
        
        return x
        
class Graph_OurConvNet(nn.Module):
    def __init__(self, net_parameters, cora=False, cuda=False):
        super(Graph_OurConvNet, self).__init__()
        
        # parameters
        Voc = net_parameters['Voc']
        D = net_parameters['D']
        nb_clusters_target = net_parameters['nb_clusters_target']
        H = net_parameters['H']
        L = net_parameters['L']
        self.cora = cora
        self.use_cuda = cuda
        self.dropout_fc = net_parameters['Dropout_fc']
        self.dropout_edge = net_parameters['Dropout_edge']
        self.drop_in = net_parameters['Dropout_in']
        
        # vector of hidden dimensions
        net_layers = []
        for layer in range(L):
            net_layers.append(H)
        
        # CL cells
        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
        net_layers_extended = [net_parameters['features']] + net_layers 
        
        L = len(net_layers)
        list_of_gnn_cells = [] # list of NN cells
        for layer in range(L//2):
            Hin, Hout = net_layers_extended[2*layer], net_layers_extended[2*layer+2]
            list_of_gnn_cells.append(OurConvNetcell(Hin,Hout, self.dropout_fc, self.dropout_edge))
        
        # register the cells for pytorch
        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
            
        # fc
        Hfinal = net_layers_extended[-1]
        self.fc = nn.Linear(Hfinal,nb_clusters_target) 
        
        # init
        self.init_weights_Graph_OurConvNet(Voc,D,Hfinal,nb_clusters_target,1)
        
        # print
#         print('\nnb of hidden layers=',L)
#         print('dim of layers (w/ embed dim)=',net_layers_extended)      
#         print('\n')
        
        # class variables
        self.D = D
        self.L = L
        self.net_layers_extended = net_layers_extended      
        
        
    def init_weights_Graph_OurConvNet(self, Fin_enc, Fout_enc, Fin_fc, Fout_fc, gain):
        scale = gain* np.sqrt(2.0/ (Fin_fc+Fout_fc))
        self.fc.weight.data.uniform_(-scale, scale)  
        self.fc.bias.data.fill_(0)  
        
    def forward(self, x, E_start, E_end, E_identity, E_dropin):
        if self.training:
            # Edge Start+End Dropout for all layers
            num_edges = E_start.shape[0]
            dropout_idx = np.array([i for i in range(num_edges) if i not in E_identity])
            np.random.shuffle(dropout_idx)
            E_start = E_start.clone()
            E_start[dropout_idx[:int(num_edges*self.dropout_edge)]] = 0
            E_end = E_end.clone()
            E_end[dropout_idx[:int(num_edges*self.dropout_edge)]] = 0
            
            # Dropin
            D_start, D_end = dropin(E_dropin, self.drop_in, x.shape[0], self.use_cuda)
            E_start = torch.cat((E_start, D_start), 0)
            E_end = torch.cat((E_end, D_end), 0)
            
        # convnet cells  
        for layer in range(self.L//2):
            gnn_layer = self.gnn_cells[layer]            
            x = gnn_layer(x,E_start,E_end) # V x H
            
        x = F.dropout(x, self.dropout_fc, training=self.training) #FC Dropout
        x = self.fc(x) # FC
        return x
         
    def loss(self, y, y_target, weight):
        loss = nn.CrossEntropyLoss()(y,y_target)
        return loss
       
    def update(self, lr, l2):
        update = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=l2)
        return update
    
    def update_learning_rate(self, optimizer, lr):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer
    
    def nb_param(self):
        return self.nb_param

In [16]:
def calculate_avg_accuracy(nb_classes, labels, pred_y):
    S = labels.data.cpu().numpy()
    C = np.argmax(torch.nn.Softmax(dim=1)(pred_y).data.cpu().numpy() , axis=1)
    return np.sum(S==C)/S.shape[0]

def update_lr(net, optimizer, average_loss, average_loss_old, lr, decay_rate, early_stopping, verbose):
    # Update LR if > early_stopping and avg val loss is higher
    if average_loss > average_loss_old and lr > early_stopping:
        lr /= decay_rate
        if verbose:
            print('Updating LR to %.7f' % lr)
    return net.update_learning_rate(optimizer, lr), lr

def print_results(iteration, batch_iters, avg_train_acc, running_train_loss, val_accuracy, lr, t_start):
    print('\niteration= %d, train loss(%diter)= %.3f, lr= %.7f, time(%diter)= %.2f' % 
          (iteration, batch_iters, running_train_loss/batch_iters, lr, 
           batch_iters, time.time() - t_start))
    print('val accuracy= %.3f' % (100* val_accuracy))
    print('train accuracy= %.3f' % (100* avg_train_acc))


In [25]:
#features_x, train_y, E_start, E_end, E_identity, E_dropin = get_cora_dataset(CUDA)

def train(net, lr, l2, batch_iters, early_stopping, verbose=False):
    ### optimization parameters
    nb_classes = 6
    max_iters = 800
    decay_rate = 1.25
    SAVE_PATH = 'model_state'

    # Optimizer
    optimizer = net.update(lr, l2) 
    t_start = time.time()
    t_start_total = time.time()
    average_loss_old = torch.tensor(1e4).cuda() if net.cuda else torch.tensor(1e4)
    best = running_train_acc = running_train_loss = running_val_loss = 0.0
    tab_results = []

    for iteration in range(1, max_iters):  # loop over the dataset multiple times
        # forward, loss
        net.train()
        pred_y = net.forward(features_x, E_start, E_end, E_identity, E_dropin)
        loss = net.loss(pred_y[idx_train], train_y[idx_train], None) 
        train_acc = calculate_avg_accuracy(nb_classes, train_y[idx_train], pred_y[idx_train]) # training acc
        running_train_acc += train_acc    
        running_train_loss += loss.item()

        # backward, update
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # validation eval
        net.eval()
        y_eval = net.forward(features_x, E_start, E_end, E_identity, E_dropin)
        val_loss = net.loss(y_eval[idx_val], train_y[idx_val], None) 
        running_val_loss += val_loss.item()

        # learning rate, print results
        if not iteration%batch_iters:
            val_accuracy = calculate_avg_accuracy(nb_classes, train_y[idx_val], y_eval[idx_val])
            average_val_loss = running_val_loss/ batch_iters
            avg_train_acc = running_train_acc/ batch_iters

            # update learning rate 
#             if val_accuracy < avg_train_acc and avg_train_acc > 0.7:
#                 optimizer, lr = update_lr(net, optimizer, average_val_loss, average_loss_old, 
#                                           lr, decay_rate, early_stopping, verbose)

            # save intermediate results
            if val_accuracy > best:
                torch.save(net.state_dict(), SAVE_PATH)
                best = val_accuracy
            tab_results.append([iteration,average_val_loss,100* val_accuracy, time.time()-t_start_total])

            if verbose:
                print_results(iteration, batch_iters, avg_train_acc, running_train_loss, val_accuracy, lr, t_start)
            if lr < torch.tensor(early_stopping).cuda() and avg_train_acc - val_accuracy > 0.05:
                print("Early Stopping at %d. Highest Val: %.3f " % (iteration, max([tab_results[i][2] for i in range(len(tab_results))])))
                return max([tab_results[i][2] for i in range(len(tab_results))])
                break

            # reset counters
            t_start = time.time()
            running_train_acc = running_train_loss = running_val_loss = 0.0
            average_loss_old = average_val_loss
    return max([tab_results[i][2] for i in range(len(tab_results))])
    

In [26]:
CORA = 1
net_parameters = {}
net_parameters['D'] = net_parameters['H'] = 50
net_parameters['features'] = features.shape[1]
net_parameters['Voc'] = 1
net_parameters['nb_clusters_target'] = y_train.shape[1]

### Train

In [53]:
TRAIN = True
if TRAIN:
    net_parameters['L'] = 4
    net_parameters['Dropout_fc'] = 0.4
    net_parameters['Dropout_edge'] = 0.4
    net_parameters['Dropout_in'] = 0.01
    net = Graph_OurConvNet(net_parameters, CORA, True)
    net.cuda()

    features_x = Variable(features, requires_grad=False).cuda()
    train_y = Variable(labels).cuda()
    E_start = Variable(torch.from_numpy(Estart).float()).cuda()
    E_end = Variable(torch.from_numpy(Eend).float()).cuda()
    E_identity = Eidentity
    E_dropin = new_edges
    
    lr = 0.0005
    l2 = 0.01
    batch_iters = 10
    early_stopping = 5e-5
    train(net, lr, l2, batch_iters, early_stopping, verbose=True)


iteration= 10, train loss(10iter)= 1.823, lr= 0.0005000, time(10iter)= 9.94
val accuracy= 18.800
train accuracy= 19.667

iteration= 20, train loss(10iter)= 1.732, lr= 0.0005000, time(10iter)= 9.88
val accuracy= 14.200
train accuracy= 26.167

iteration= 30, train loss(10iter)= 1.648, lr= 0.0005000, time(10iter)= 9.88
val accuracy= 14.200
train accuracy= 31.250

iteration= 40, train loss(10iter)= 1.590, lr= 0.0005000, time(10iter)= 9.89
val accuracy= 13.800
train accuracy= 35.750

iteration= 50, train loss(10iter)= 1.503, lr= 0.0005000, time(10iter)= 9.88
val accuracy= 13.800
train accuracy= 42.500

iteration= 60, train loss(10iter)= 1.439, lr= 0.0005000, time(10iter)= 9.91
val accuracy= 13.800
train accuracy= 47.583

iteration= 70, train loss(10iter)= 1.375, lr= 0.0005000, time(10iter)= 9.87
val accuracy= 14.000
train accuracy= 52.667

iteration= 80, train loss(10iter)= 1.334, lr= 0.0005000, time(10iter)= 9.87
val accuracy= 19.200
train accuracy= 53.917

iteration= 90, train loss(10ite


iteration= 690, train loss(10iter)= 0.833, lr= 0.0005000, time(10iter)= 9.88
val accuracy= 36.600
train accuracy= 72.750

iteration= 700, train loss(10iter)= 0.837, lr= 0.0005000, time(10iter)= 9.87
val accuracy= 33.400
train accuracy= 72.667

iteration= 710, train loss(10iter)= 0.789, lr= 0.0005000, time(10iter)= 9.86
val accuracy= 34.200
train accuracy= 75.000

iteration= 720, train loss(10iter)= 0.832, lr= 0.0005000, time(10iter)= 9.88
val accuracy= 35.200
train accuracy= 71.750

iteration= 730, train loss(10iter)= 0.805, lr= 0.0005000, time(10iter)= 9.90
val accuracy= 32.800
train accuracy= 72.750

iteration= 740, train loss(10iter)= 0.832, lr= 0.0005000, time(10iter)= 9.86
val accuracy= 33.200
train accuracy= 73.083

iteration= 750, train loss(10iter)= 0.809, lr= 0.0005000, time(10iter)= 9.86
val accuracy= 35.800
train accuracy= 73.333

iteration= 760, train loss(10iter)= 0.819, lr= 0.0005000, time(10iter)= 9.87
val accuracy= 38.200
train accuracy= 72.250

iteration= 770, train l

In [29]:
net.eval()
y_eval = net.forward(features_x, E_start, E_end, E_identity, E_dropin)
loss = net.loss(y_eval[idx_test], labels[idx_test], None) 
accuracy = calculate_avg_accuracy(nb_classes, labels[idx_test], y_eval[idx_test])
print('\nloss(100 pre-saved data)= %.3f, accuracy(100 pre-saved data)= %.3f' % (loss.item(), 100* accuracy))

RuntimeError: index 3312 is out of bounds for dim with size 3312

### Hyperparam Search

In [None]:
iters = 3
batch_iters = 20
early_stopping = 25e-5
    
for _ in range(100):
    lr = random.uniform(0.0005, 0.0015)
    L = random.choice([2,4,6])
    l2 = random.uniform(0.005, 0.02)
    d_fc = random.uniform(0.4, 0.7)
    d_edge = random.uniform(0.4, 0.7)
    d_in = random.uniform(0.0, 0.05)
    
    features_x = Variable(features, requires_grad=False).cuda()
    train_y = Variable(labels).cuda()
    E_start = Variable(torch.from_numpy(Estart).float()).cuda()
    E_end = Variable(torch.from_numpy(Eend).float()).cuda()
    E_identity = Eidentity
    E_dropin = new_edges

    net_parameters['L'] = L
    net_parameters['Dropout_fc'] = d_fc
    net_parameters['Dropout_edge'] = d_edge
    net_parameters['Dropout_in'] = d_in
    print("Setting Layers=%d, L2=%.5f, LR=%f, Dropout_FC=%.3f, Dropout_edge=%.3f, Dropin=%.5f" % (L, l2, lr, d_fc, d_edge, d_in))

    val_total = 0.
    for iteration in range(iters):
        net = Graph_OurConvNet(net_parameters, 1, True)
        net.cuda()
        val_total += train(net, lr, l2, batch_iters, early_stopping, verbose=False)
    print("Avg Val Accuracy", val_total/iters)

Setting Layers=2, L2=0.00745, LR=0.001034, Dropout_FC=0.514, Dropout_edge=0.697, Dropin=0.03200
Avg Val Accuracy 46.0
Setting Layers=6, L2=0.00986, LR=0.001057, Dropout_FC=0.630, Dropout_edge=0.417, Dropin=0.04109
Avg Val Accuracy 34.86666666666667
Setting Layers=4, L2=0.00902, LR=0.001305, Dropout_FC=0.463, Dropout_edge=0.683, Dropin=0.04382
Avg Val Accuracy 43.599999999999994
Setting Layers=6, L2=0.01249, LR=0.000815, Dropout_FC=0.665, Dropout_edge=0.593, Dropin=0.00714
Avg Val Accuracy 33.13333333333333
Setting Layers=6, L2=0.01342, LR=0.000640, Dropout_FC=0.479, Dropout_edge=0.575, Dropin=0.04489
Avg Val Accuracy 33.93333333333333
Setting Layers=2, L2=0.01996, LR=0.000899, Dropout_FC=0.441, Dropout_edge=0.548, Dropin=0.03779
Avg Val Accuracy 45.800000000000004
Setting Layers=2, L2=0.01441, LR=0.001361, Dropout_FC=0.638, Dropout_edge=0.527, Dropin=0.00318
Early Stopping at 460. Highest Val: 45.800 
Early Stopping at 320. Highest Val: 43.800 
Early Stopping at 440. Highest Val: 43.60

### Evaluation

In [63]:
SAVE_PATH = 'model_state'
nb_classes = max(labels_y).item()+1
net.load_state_dict(torch.load(SAVE_PATH))
net.eval()
# features_x, train_y, E_start, E_end, E_identity, E_dropin = get_cora_dataset(CUDA)
y_eval = net.forward(features_x, E_start, E_end, E_identity, E_dropin)

loss = net.loss(y_eval[idx_test], labels[idx_test], None) 
accuracy = calculate_avg_accuracy(nb_classes, labels[idx_test], y_eval[idx_test])
print('\nloss(100 pre-saved data)= %.3f, accuracy(100 pre-saved data)= %.3f' % (loss.item(), 100* accuracy))

RuntimeError: index 3326 is out of bounds for dimension 0 with size 3312