In [174]:
import numpy as np
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from models import LogReg
from utils import process

dataset = 'cora'

# training params
batch_size = 1
nb_epochs = 2000
patience = 100
lr = 0.001
l2_coef = 0.0
drop_prob = 0.0
hid_units = 50
sparse = False
nonlinearity = 'prelu' # special name to separate parameters

In [175]:
adj, features, labels, idx_train, idx_val, idx_test = process.load_data(dataset)
features, _ = process.preprocess_features(features)

nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = labels.shape[1]

# adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))

# if sparse:
#     sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj)
# else:
#     adj = (adj + sp.eye(adj.shape[0])).todense()

features = torch.FloatTensor(features[np.newaxis])
# if not sparse:
#     adj = torch.FloatTensor(adj[np.newaxis])
labels = torch.FloatTensor(labels[np.newaxis])
idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

### RGGCN

In [176]:
adj = adj.toarray().astype(float)
adj += np.eye(adj.shape[0])

new_edges = []
for v1 in idx_train:
    for v2 in idx_train:
        if v1 != v2 and adj[v1, v2] != 1: # and labels[v1] == labels[v2]:
            new_edges.append((v1,v2))
new_edges = np.array(new_edges)


def dropin(new_edges, rate):
    np.random.shuffle(new_edges)
    v = new_edges.shape[0]
    E_start = np.zeros((v, 2708))
    E_end = np.zeros((v, 2708))
    for i in range(0, int(v*rate), 2):
        v1, v2 = new_edges[i]
        E_start[i,v1] = E_end[i,v2] = E_start[i+1,v1] = E_end[i+1,v2] = 1
    E_start = Variable(torch.from_numpy(E_start[:i+2,:]).float())
    E_end = Variable(torch.from_numpy(E_end[:i+2,:]).float())
    return E_start.cuda(), E_end.cuda()

cora_Estart = np.zeros((20000, 2708))
cora_Eend = np.zeros((20000, 2708))
cora_Eidentity = [] # idx of identity edges

# converting adjacency matrix to edge-to-start, edge-to-end vertex matrix
count = 0
for i in range(adj.shape[0]):
    for j in range(adj.shape[1]):
        if adj[i,j] == 1:
            cora_Estart[count,i] = 1
            cora_Eend[count,j] = 1
            if i == j:
                cora_Eidentity.append(count)
            count += 1
cora_Estart = cora_Estart[:count]
cora_Eend = cora_Eend[:count]

In [177]:
def get_cora_dataset():
    E_start = Variable(torch.from_numpy(cora_Estart).float())
    E_end = Variable(torch.from_numpy(cora_Eend).float())
    
    return E_start.cuda(), E_end.cuda(), cora_Eidentity, new_edges

In [178]:
class OurConvNetcell(nn.Module):
    def __init__(self, dim_in, dim_out, dropout_fc=0, dropout_edge=0):
        super(OurConvNetcell, self).__init__()
    
        # conv1
        self.Ui1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vi1 = nn.Linear(dim_in, dim_out, bias=False) 
        self.Vj1 = nn.Linear(dim_in, dim_out, bias=False)  
        self.bu1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv1 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        self.dropout_fc = dropout_fc
        self.dropout_edge = dropout_edge
        
        # conv2
        self.Ui2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vi2 = nn.Linear(dim_out, dim_out, bias=False) 
        self.Vj2 = nn.Linear(dim_out, dim_out, bias=False)  
        self.bu2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        self.bv2 = torch.nn.Parameter( torch.FloatTensor(dim_out), requires_grad=True )
        
        # bn1, bn2
        self.bn1 = torch.nn.BatchNorm1d(dim_out)
        self.bn2 = torch.nn.BatchNorm1d(dim_out)
        
        # resnet
        self.R = nn.Linear(dim_in, dim_out, bias=False) 
        
        # init
        self.init_weights_OurConvNetcell(dim_in, dim_out, 1)
        
         
    def init_weights_OurConvNetcell(self, dim_in, dim_out, gain):   
        # conv1
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.Ui1.weight.data.uniform_(-scale, scale) 
        self.Vi1.weight.data.uniform_(-scale, scale) 
        self.Vj1.weight.data.uniform_(-scale, scale) 
        self.bu1.data.fill_(0)
        self.bv1.data.fill_(0)
        
        # conv2
        scale = gain* np.sqrt( 2.0/ dim_out )
        self.Ui2.weight.data.uniform_(-scale, scale) 
        self.Vi2.weight.data.uniform_(-scale, scale) 
        self.Vj2.weight.data.uniform_(-scale, scale) 
        self.bu2.data.fill_(0)
        self.bv2.data.fill_(0)
        
        # RN
        scale = gain* np.sqrt( 2.0/ dim_in )
        self.R.weight.data.uniform_(-scale, scale)  
        
        
    def forward(self, x, E_start, E_end):
        x = F.dropout(x, self.dropout_fc, training=self.training)
        xin = x
        
        # edge norm
        norm = torch.sum(E_end.t(), 1).reshape(-1,1)
#         norm = torch.max(norm, torch.ones(norm.shape).cuda())

        # conv1
        Uix = self.Ui1(x)  #  V x H_out
        Vix = self.Vi1(x)  #  V x H_out
        Vjx = self.Vj1(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv1  # E x H_out
        x1 = torch.sigmoid(x1)

        x2 = torch.mm(E_start, Uix)  #  E x H_out
        x = torch.mm(E_end.t(), x1*x2) + self.bu1 #  V x H_out
        
        x = torch.div(x, norm)# norm
        x = self.bn1(x) # bn1
        x = torch.nn.LeakyReLU(0.1)(x) # relu1
        
        # conv2
        Uix = self.Ui2(x)  #  V x H_out
        Vix = self.Vi2(x)  #  V x H_out
        Vjx = self.Vj2(x)  #  V x H_out
        x1 = torch.mm(E_end,Vix) + torch.mm(E_start,Vjx) + self.bv2  # E x H_out
        x1 = torch.sigmoid(x1)
        
        x2 = torch.mm(E_start, Uix)  #  V x H_out        
        x = torch.mm(E_end.t(), x1*x2) + self.bu2 #  V x H_out
        
        x = torch.div(x, norm) # normalization
        
        x = self.bn2(x) # bn2
        x = x + self.R(xin) # addition
        x = torch.nn.LeakyReLU(0.1)(x) # relu2
        
        return x
        
class Graph_OurConvNet(nn.Module):
    def __init__(self, net_parameters, cora=False):
        super(Graph_OurConvNet, self).__init__()
        
        # parameters
        Voc = net_parameters['Voc']
        D = net_parameters['D']
        nb_clusters_target = net_parameters['nb_clusters_target']
        H = net_parameters['H']
        L = net_parameters['L']
        self.cora = cora
        self.dropout_fc = net_parameters['Dropout_fc']
        self.dropout_edge = net_parameters['Dropout_edge']
        self.drop_in = net_parameters['Dropout_in']
        
        # vector of hidden dimensions
        net_layers = []
        for layer in range(L):
            net_layers.append(H)
        
        # CL cells
        # NOTE: Each graph convnet cell uses *TWO* convolutional operations
        net_layers_extended = [net_parameters['features']] + net_layers 
        
        L = len(net_layers)
        list_of_gnn_cells = [] # list of NN cells
        for layer in range(L//2):
            Hin, Hout = net_layers_extended[2*layer], net_layers_extended[2*layer+2]
            list_of_gnn_cells.append(OurConvNetcell(Hin,Hout, self.dropout_fc, self.dropout_edge))
        
        # register the cells for pytorch
        self.gnn_cells = nn.ModuleList(list_of_gnn_cells)
            
        # fc
        Hfinal = net_layers_extended[-1]
        self.fc = nn.Linear(Hfinal,nb_clusters_target) 
        
        # init
        self.init_weights_Graph_OurConvNet(Voc,D,Hfinal,nb_clusters_target,1)
        
        # print
        print('\nnb of hidden layers=',L)
        print('dim of layers (w/ embed dim)=',net_layers_extended)      
        print('\n')
        
        # class variables
        self.D = D
        self.L = L
        self.net_layers_extended = net_layers_extended      
        
        
    def init_weights_Graph_OurConvNet(self, Fin_enc, Fout_enc, Fin_fc, Fout_fc, gain):
        scale = gain* np.sqrt(2.0/ (Fin_fc+Fout_fc))
        self.fc.weight.data.uniform_(-scale, scale)  
        self.fc.bias.data.fill_(0)  
        
    def forward(self, x, E_start, E_end, E_identity, E_dropin):
        if self.training:
            # Edge Start+End Dropout for all layers
            num_edges = E_start.shape[0]
            dropout_idx = np.array([i for i in range(num_edges) if i not in E_identity])
            np.random.shuffle(dropout_idx)
            E_start = E_start.clone()
            E_start[dropout_idx[:int(num_edges*self.dropout_edge)]] = 0
            E_end = E_end.clone()
            E_end[dropout_idx[:int(num_edges*self.dropout_edge)]] = 0
            
            # Dropin
            D_start, D_end = dropin(E_dropin, self.drop_in)
            E_start = torch.cat((E_start, D_start), 0)
            E_end = torch.cat((E_end, D_end), 0)
            
        # convnet cells  
        for layer in range(self.L//2):
            gnn_layer = self.gnn_cells[layer]            
            x = gnn_layer(x,E_start,E_end) # V x H
            
#         x = F.dropout(x, self.dropout_fc, training=self.training) #FC Dropout
#         x = self.fc(x) # FC
        return x
         
    def loss(self, y, y_target, weight):
        loss = nn.CrossEntropyLoss()(y,y_target)
        return loss
       
    def update(self, lr, l2):
        update = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=l2)
        return update
    
    def update_learning_rate(self, optimizer, lr):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        return optimizer
    
    def nb_param(self):
        return self.nb_param

In [179]:
class Discriminator(nn.Module):
    def __init__(self, n_h):
        super(Discriminator, self).__init__()
        self.f_k = nn.Bilinear(n_h, n_h, 8)

        for m in self.modules():
            self.weights_init(m)

    def weights_init(self, m):
        if isinstance(m, nn.Bilinear):
            torch.nn.init.xavier_uniform_(m.weight.data)
            if m.bias is not None:
                m.bias.data.fill_(0.0)

    def forward(self, c, h_pl, h_mi, s_bias1=None, s_bias2=None):
        c_x = torch.unsqueeze(c, 1)
        c_x = c_x.expand_as(h_pl)

        sc_1 = torch.squeeze(self.f_k(h_pl, c_x))
        sc_2 = torch.squeeze(self.f_k(h_mi, c_x))

        if s_bias1 is not None:
            sc_1 += s_bias1
        if s_bias2 is not None:
            sc_2 += s_bias2

        logits = torch.cat((sc_1, sc_2), 0)

        return logits

In [180]:
from layers import GCN, AvgReadout

class DGI(nn.Module):
    def __init__(self, n_in, n_h, activation, net_parameters):
        super(DGI, self).__init__()
        self.gcn = Graph_OurConvNet(net_parameters, True)
        self.read = AvgReadout()
        self.drop_prob = net_parameters['Dropout_fc']

        self.sigm = nn.Sigmoid()

        self.disc = Discriminator(n_h)

    def forward(self, seq1, seq2, E_start, E_end, E_identity, E_dropin, adj, sparse, msk, samp_bias1, samp_bias2):
        h_1 = self.gcn(seq1, E_start, E_end, E_identity, E_dropin)

        c = self.read(h_1, msk)
        c = self.sigm(c)

        h_2 = self.gcn(seq2, E_start, E_end, E_identity, E_dropin)
        
#         h_1 = F.dropout(h_1, self.drop_prob, training=self.training)
#         h_2 = F.dropout(h_2, self.drop_prob, training=self.training)
        
        ret = self.disc(c, h_1, h_2, samp_bias1, samp_bias2)

        return ret

    # Detach the return variables
    def embed(self, seq,  E_start, E_end, E_identity, E_dropin, adj, sparse, msk):
        h_1 = self.gcn(seq,  E_start, E_end, E_identity, E_dropin)
        c = self.read(h_1, msk)

        return h_1.detach(), c.detach()

In [181]:
features = features.reshape(2708, -1)

net_parameters = {}
net_parameters['D'] = net_parameters['H'] = hid_units
net_parameters['features'] = features.shape[1]
net_parameters['Voc'] = 7+1 
net_parameters['nb_clusters_target'] = 7
net_parameters['L'] = 10
net_parameters['Dropout_fc'] = 0.0
net_parameters['Dropout_edge'] = 0.0
net_parameters['Dropout_in'] = 0.001

In [182]:
model = DGI(ft_size, hid_units, nonlinearity, net_parameters)
optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef)

if torch.cuda.is_available():
    print('Using CUDA')
    model.cuda()
    features = features.cuda()
#     if sparse:
#         sp_adj = sp_adj.cuda()
#     else:
#         adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()
    
b_xent = nn.BCEWithLogitsLoss()
xent = nn.CrossEntropyLoss()
nll = nn.NLLLoss()
cnt_wait = 0
best = 1e9
best_t = 0

train_lbls = torch.argmax(labels[0, idx_train], dim=1)
val_lbls = torch.argmax(labels[0, idx_val], dim=1)
test_lbls = torch.argmax(labels[0, idx_test], dim=1)


nb of hidden layers= 10
dim of layers (w/ embed dim)= [1433, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]


Using CUDA


In [183]:
E_start, E_end, E_identity, E_dropin = get_cora_dataset()

for epoch in range(nb_epochs):
    model.train()
    optimiser.zero_grad()

    idx = np.random.permutation(nb_nodes)
    shuf_fts = features[idx, :]

    
    lbl_1 = torch.zeros(nb_nodes, dtype=torch.long)
    lbl_2 = torch.ones(nb_nodes, dtype=torch.long)
    lbl = torch.cat((lbl_1, lbl_2), 0)

    if torch.cuda.is_available():
        shuf_fts = shuf_fts.cuda()
        lbl = lbl.cuda()
    
    logits = model(features, shuf_fts, E_start, E_end,E_identity, E_dropin, sp_adj if sparse else adj, sparse, None, None, None) 
    
    # SSL Loss
    sup_loss = xent(logits[idx_train], train_lbls)/5.

    log = nn.functional.log_softmax(logits, dim=1)
    ul_logits = torch.cat((torch.sum(log[:,:-1], dim=1).reshape(-1,1),log[:,-1].reshape(-1,1)),1)
    ul_loss = nll(ul_logits, lbl)

    loss = ul_loss
    
    print('Loss: %.4f, UL: %.4f, Sup: %.4f' % (loss.item(), ul_loss.item(), sup_loss.item()))

    if loss < best:
        best = loss
        best_t = epoch
        cnt_wait = 0
        torch.save(model.state_dict(), 'best_dgi.pkl')
    else:
        cnt_wait += 1

    if cnt_wait == patience:
        print('Early stopping!')
        break

    loss.backward()
    optimiser.step()
    
    model.eval()
    logits = model(features, shuf_fts, E_start, E_end, E_identity, E_dropin, sp_adj if sparse else adj, sparse, None, None, None) 
    log = nn.functional.log_softmax(logits, dim=1)
    preds = torch.argmax(log[:,:-1], dim=1)
    train_acc = torch.sum(preds[idx_train] == train_lbls).float() / train_lbls.shape[0]
    acc = torch.sum(preds[idx_test] == test_lbls).float() / test_lbls.shape[0]
    print("%d Test Acc: %.4f, Train Acc: %.4f\n" % (epoch, acc.item(), train_acc.item()))

Loss: 8.5886, UL: 8.5886, Sup: 0.4334
0 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.4745, UL: 8.4745, Sup: 0.4207
1 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.4133, UL: 8.4133, Sup: 0.4111
2 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.3711, UL: 8.3711, Sup: 0.4129
3 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.3314, UL: 8.3314, Sup: 0.4167
4 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.3081, UL: 8.3081, Sup: 0.4199
5 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.3051, UL: 8.3051, Sup: 0.4230
6 Test Acc: 0.1600, Train Acc: 0.1214

Loss: 8.2864, UL: 8.2864, Sup: 0.4217
7 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 8.2611, UL: 8.2611, Sup: 0.4199
8 Test Acc: 0.1650, Train Acc: 0.1286

Loss: 8.2387, UL: 8.2387, Sup: 0.4160
9 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.1938, UL: 8.1938, Sup: 0.4144
10 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.1624, UL: 8.1624, Sup: 0.4122
11 Test Acc: 0.1490, Train Acc: 0.1429

Loss: 8.1264, UL: 8.1264, Sup: 0.4112
12 Test Acc: 0.3140, Train Acc: 0.13

105 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8122, UL: 6.8122, Sup: 0.3894
106 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8127, UL: 6.8127, Sup: 0.3893
107 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8121, UL: 6.8121, Sup: 0.3893
108 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8122, UL: 6.8122, Sup: 0.3894
109 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8145, UL: 6.8145, Sup: 0.3893
110 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8121, UL: 6.8121, Sup: 0.3894
111 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8119, UL: 6.8119, Sup: 0.3894
112 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8119, UL: 6.8119, Sup: 0.3892
113 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8120, UL: 6.8120, Sup: 0.3893
114 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8118, UL: 6.8118, Sup: 0.3892
115 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8119, UL: 6.8119, Sup: 0.3893
116 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8119, UL: 6.8119, Sup: 0.3893
117 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.811

209 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8111, UL: 6.8111, Sup: 0.3894
210 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8113, UL: 6.8113, Sup: 0.3895
211 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8111, UL: 6.8111, Sup: 0.3894
212 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8111, UL: 6.8111, Sup: 0.3893
213 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8115, UL: 6.8115, Sup: 0.3897
214 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8110, UL: 6.8110, Sup: 0.3894
215 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8111, UL: 6.8111, Sup: 0.3893
216 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8111, UL: 6.8111, Sup: 0.3893
217 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8113, UL: 6.8113, Sup: 0.3893
218 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8112, UL: 6.8112, Sup: 0.3893
219 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8111, UL: 6.8111, Sup: 0.3894
220 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8112, UL: 6.8112, Sup: 0.3891
221 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.811

313 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8110, UL: 6.8110, Sup: 0.3891
314 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8111, UL: 6.8111, Sup: 0.3895
315 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8110, UL: 6.8110, Sup: 0.3895
316 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8110, UL: 6.8110, Sup: 0.3888
317 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8110, UL: 6.8110, Sup: 0.3894
318 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3895
319 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8110, UL: 6.8110, Sup: 0.3890
320 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3894
321 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3894
322 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3890
323 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3894
324 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8110, UL: 6.8110, Sup: 0.3891
325 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.811

417 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3893
418 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3891
419 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3893
420 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3893
421 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3892
422 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3893
423 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3892
424 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3892
425 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8112, UL: 6.8112, Sup: 0.3893
426 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3893
427 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3892
428 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3892
429 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.810

521 Test Acc: 0.1500, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3894
522 Test Acc: 0.1460, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3891
523 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3893
524 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3892
525 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3892
526 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3893
527 Test Acc: 0.1460, Train Acc: 0.1429

Loss: 6.8109, UL: 6.8109, Sup: 0.3892
528 Test Acc: 0.1480, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3893
529 Test Acc: 0.1460, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3891
530 Test Acc: 0.1500, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3892
531 Test Acc: 0.1450, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3892
532 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.8108, UL: 6.8108, Sup: 0.3891
533 Test Acc: 0.1440, Train Acc: 0.1429

Loss: 6.810

KeyboardInterrupt: 

In [190]:
print('Loading {}th epoch'.format(best_t))
model.load_state_dict(torch.load('best_dgi.pkl'))

model.eval()
logits = model(features, shuf_fts,  E_start, E_end, E_identity, E_dropin, sp_adj if sparse else adj, sparse, None, None, None) 
log = nn.functional.log_softmax(logits, dim=1)


Loading 613th epoch


In [191]:
preds = torch.argmax(log, dim=1)

In [192]:
count = np.zeros(8, dtype=int)
for i in range(2708):
    count[preds[i]] += 1
print(count)

count = np.zeros(8, dtype=int)
for i in range(2708,5416):
    count[preds[i]] += 1
print(count)

[   0    0   15    0    0    0    0 2693]
[   0    0    2    0    0    0    0 2706]


In [193]:
preds = torch.argmax(log[:,:-1], dim=1)
acc = torch.sum(preds[idx_test] == test_lbls).float() / test_lbls.shape[0]
print("Test Acc", acc)

Test Acc tensor(0.1450, device='cuda:0')


In [194]:
print('Loading {}th epoch'.format(best_t))
model.load_state_dict(torch.load('best_dgi.pkl'))

logits = model(features, shuf_fts,  E_start, E_end, E_identity, E_dropin, sp_adj if sparse else adj, sparse, None, None, None) 
log = nn.functional.log_softmax(logits, dim=1)
preds = torch.argmax(log[:,:-1], dim=1)
acc = torch.sum(preds[idx_test] == test_lbls).float() / test_lbls.shape[0]
acc

Loading 613th epoch


tensor(0.1450, device='cuda:0')

In [189]:
print('Loading {}th epoch'.format(best_t))
model.load_state_dict(torch.load('best_dgi.pkl'))

embeds, _ = model.embed(features,  E_start, E_end, E_identity, E_dropin, sp_adj if sparse else adj, sparse, None)
train_embs = embeds[idx_train]
val_embs = embeds[idx_val]
test_embs = embeds[idx_test]

train_lbls = torch.argmax(labels[0, idx_train], dim=1)
val_lbls = torch.argmax(labels[0, idx_val], dim=1)
test_lbls = torch.argmax(labels[0, idx_test], dim=1)

tot = torch.zeros(1)
tot = tot.cuda()

accs = []

for _ in range(50):
    log = LogReg(hid_units, nb_classes)
    opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0)
    log.cuda()

    pat_steps = 0
    best_acc = torch.zeros(1)
    best_acc = best_acc.cuda()
    for _ in range(100):
        log.train()
        opt.zero_grad()

        logits = log(train_embs)
        loss = xent(logits, train_lbls)
        
        loss.backward()
        opt.step()

        
    tlogits = log(train_embs)
    tpreds = torch.argmax(tlogits, dim=1)
    train_acc = torch.sum(tpreds == train_lbls).float() / train_lbls.shape[0]

    logits = log(test_embs)
    preds = torch.argmax(logits, dim=1)
    acc = torch.sum(preds == test_lbls).float() / test_lbls.shape[0]
    print("Test Acc: %.4f, Train Acc: %.4f" % (acc.item(), train_acc.item()))
    accs.append(acc * 100)
    tot += acc

print('Average accuracy:', tot / 50)

accs = torch.stack(accs)
print(accs.mean())
print(accs.std())

Loading 613th epoch
Test Acc: 0.5450, Train Acc: 0.6643
Test Acc: 0.5400, Train Acc: 0.6571
Test Acc: 0.5460, Train Acc: 0.6571
Test Acc: 0.5250, Train Acc: 0.6429
Test Acc: 0.5270, Train Acc: 0.6643
Test Acc: 0.5540, Train Acc: 0.6714
Test Acc: 0.5550, Train Acc: 0.6500
Test Acc: 0.5300, Train Acc: 0.6643
Test Acc: 0.5470, Train Acc: 0.6571
Test Acc: 0.5390, Train Acc: 0.6429
Test Acc: 0.5490, Train Acc: 0.6500
Test Acc: 0.5590, Train Acc: 0.6786
Test Acc: 0.5560, Train Acc: 0.6857
Test Acc: 0.5430, Train Acc: 0.6643
Test Acc: 0.5430, Train Acc: 0.6643
Test Acc: 0.5260, Train Acc: 0.6429
Test Acc: 0.5540, Train Acc: 0.6857
Test Acc: 0.5460, Train Acc: 0.6429
Test Acc: 0.5280, Train Acc: 0.6500
Test Acc: 0.5400, Train Acc: 0.6429
Test Acc: 0.5480, Train Acc: 0.6500
Test Acc: 0.5380, Train Acc: 0.6571
Test Acc: 0.5330, Train Acc: 0.6429
Test Acc: 0.5540, Train Acc: 0.6643
Test Acc: 0.5510, Train Acc: 0.6643
Test Acc: 0.5500, Train Acc: 0.6643
Test Acc: 0.5580, Train Acc: 0.6429
Test Acc