In [1]:
import networkx as nx
import pandas as pd
import numpy as np
from utils import load_data,normalize,doublerelu
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
torch.set_printoptions(sci_mode=False)
import time

In [2]:
import warnings
warnings.filterwarnings("ignore") 

In [3]:
cuda = torch.cuda.is_available()
weight_decay = 10e-4
epochs = 10001
seed = 165
hidden = 10
lr = 0.0001

In [4]:
np.random.seed(seed)
torch.manual_seed(seed)
if cuda:
    torch.cuda.manual_seed(seed)

In [5]:
class GNN1Layer(Module):

    def __init__(self, batch_size, in_features, out_features):
        super(GNN1Layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.batch_size = batch_size

        weight1_eye = torch.FloatTensor(torch.eye(in_features, out_features))
        weight1_eye = weight1_eye.reshape((1, in_features, out_features))
        weight1_eye = weight1_eye.repeat(batch_size, 1, 1)
        self.weight1 = Parameter(weight1_eye)
        self.weight2 = Parameter(torch.zeros(batch_size, in_features, out_features))

    def forward(self, input, adj):
        v1 = torch.bmm(input, self.weight1)
        v2 = torch.bmm(torch.bmm(adj, input), self.weight2)
        output = v1 + v2
        return output

In [6]:
class GNN2Layer(Module):

    def __init__(self, batch_size, in_features, out_features):
        super(GNN2Layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.batch_size = batch_size
        weight1_eye = torch.FloatTensor(torch.eye(in_features, out_features))
        weight1_eye = weight1_eye.reshape((1, in_features, out_features))
        weight1_eye = weight1_eye.repeat(batch_size, 1, 1)
        weight1_rand = torch.empty(batch_size,in_features,out_features-in_features)
        torch.nn.init.xavier_uniform_(weight1_rand, gain=1.0)
        self.weight1 = Parameter(weight1_eye)
        self.weight2 = Parameter(torch.zeros(batch_size, in_features, out_features))

    def forward(self, input, adj):
        v1 = torch.bmm(input, self.weight1)
        v2 = torch.bmm(torch.bmm(adj, input), self.weight2)
        output = v1 + v2
        return output

In [7]:
class GNN1(nn.Module):

    def __init__(self, batch_size, nfeat, ndim, hidden):
        super(GNN1, self).__init__()

        self.gc1 = GNN1Layer(batch_size, ndim, ndim)

    def forward(self, x, adj, random_indices):
        f = torch.clone(x)
        x = doublerelu(self.gc1(x, adj))
        x = x/x.sum(axis=2).unsqueeze(2) #normalize st sum = 1

        f[0][random_indices, :] = x[0][random_indices, :]
        
        return f

In [8]:
class GNN2(nn.Module):

    def __init__(self, batch_size, nfeat, ndim, hidden):
        super(GNN2, self).__init__()

        self.gc1 = GNN2Layer(batch_size, ndim, hidden)
        self.gc2 = GNN1Layer(batch_size, hidden, ndim)

    def forward(self, x, adj):
        x = doublerelu(self.gc1(x, adj))
        x = doublerelu(self.gc2(x, adj))
        x = x/x.sum(axis=2).unsqueeze(2) #normalize st sum = 1
        return x

In [9]:
def train(adj,features,labels,random_indices):
    
    adj_norm = normalize(adj)
    
    labels = labels - 1
    
    adj = torch.FloatTensor(adj)
    adj_norm = torch.FloatTensor(adj_norm)
    features = torch.FloatTensor(features)
    labels = torch.FloatTensor(labels)
    
    model = GNN1(batch_size=adj.shape[0],
                nfeat=adj.shape[1],
                ndim=nb_label,
                hidden=hidden)
    if cuda:
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        adj_norm = adj_norm.cuda()
        labels = labels.cuda()
    
    # Train model
    t_total = time.time()

    optimizer = optim.Adam(model.parameters(),
                           lr=lr, weight_decay=weight_decay)
    
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(epochs):

        t = time.time()
        model.train()
        optimizer.zero_grad()

        output = model(features, adj_norm, random_indices)
            
        accuracy = torch.sum(torch.argmax(output,axis=2)==labels.reshape(1,-1))/labels.shape[0]
        
        loss = criterion(output[0],labels.reshape(-1).long())

        loss.backward(retain_graph=True)

        optimizer.step()

        if epoch == 0:
            best_loss = loss
            best_output = output
            best_acc = accuracy
            init_acc = accuracy
        else:
            if loss < best_loss:
                best_loss = loss
                best_output = output
                best_acc = accuracy

        if epoch % 1000 == 0:
            print('Epoch: {:04d}'.format(epoch + 1),
                  'Accuracy: {:.4f}'.format(best_acc.item()),
                  'Loss: {:.8f}'.format(best_loss.item()),
                  'time: {:.4f}s'.format(time.time() - t))
            
    print("Optimization Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
    
    return best_loss,best_output, init_acc, best_acc

In [10]:
adj,feature,labels = load_data()

#feature = feature - 1
#nb_label = int(max(feature))+1
#featuress = np.eye(nb_label)[np.array(feature,dtype=int).reshape(1,-1)]

In [11]:
label = np.copy(labels)
label = label - 1
nb_label = int(max(label))+1
featuress = np.eye(nb_label)[np.array(label,dtype=int).reshape(1,-1)]

In [12]:
mask_percentage = [0.3,0.5,0.7]
init = []
final = []
for m in mask_percentage:
    
    features = np.copy(featuress)
    # Masking
    number_of_rows = features[0].shape[0]
    random_indices = np.random.choice(number_of_rows, size=int(m*number_of_rows), replace=False)
    random_rows = features[0][random_indices, :]
    features[0][random_indices, :] = np.tile(np.array([[0.2]]),random_rows.shape)
    
    print("\nMasked {}% of nodes\n".format(int(m*100)))
    prev_loss, op, acc, _ = train(adj,features,labels, random_indices)
    init.append(acc.item())
    #print(op)
    loss, op, _, acc = train(adj,op.cpu().detach().numpy(),labels, random_indices)
    i = 0
    while loss < prev_loss :
        i += 1
        if i >= 3:
            break
        prev_loss = loss
        loss, op, _, acc = train(adj,op.cpu().detach().numpy(),labels, random_indices)
    final.append(acc.item())
    


Masked 30% of nodes

Epoch: 0001 Accuracy: 0.7395 Loss: 1.11618054 time: 0.3630s
Epoch: 1001 Accuracy: 0.8108 Loss: 1.09252489 time: 0.0020s
Epoch: 2001 Accuracy: 0.8108 Loss: 1.08271003 time: 0.0020s
Epoch: 3001 Accuracy: 0.8095 Loss: 1.08164322 time: 0.0020s
Epoch: 4001 Accuracy: 0.8122 Loss: 1.07989132 time: 0.0024s
Epoch: 5001 Accuracy: 0.8285 Loss: 1.06905651 time: 0.0020s
Epoch: 6001 Accuracy: 0.8433 Loss: 1.05804741 time: 0.0020s
Epoch: 7001 Accuracy: 0.8433 Loss: 1.05804741 time: 0.0020s
Epoch: 8001 Accuracy: 0.8433 Loss: 1.05804741 time: 0.0020s
Epoch: 9001 Accuracy: 0.8442 Loss: 1.05660546 time: 0.0020s
Epoch: 10001 Accuracy: 0.8442 Loss: 1.05660546 time: 0.0010s
Optimization Finished!
Total time elapsed: 19.7985s
Epoch: 0001 Accuracy: 0.8442 Loss: 1.05660546 time: 0.0030s
Epoch: 1001 Accuracy: 0.8447 Loss: 1.05622149 time: 0.0010s
Epoch: 2001 Accuracy: 0.8456 Loss: 1.05588710 time: 0.0010s
Epoch: 3001 Accuracy: 0.8456 Loss: 1.05588710 time: 0.0020s
Epoch: 4001 Accuracy: 0.8

Epoch: 5001 Accuracy: 0.6180 Loss: 1.26956975 time: 0.0020s
Epoch: 6001 Accuracy: 0.6180 Loss: 1.26956975 time: 0.0020s
Epoch: 7001 Accuracy: 0.6180 Loss: 1.26956975 time: 0.0020s
Epoch: 8001 Accuracy: 0.6180 Loss: 1.26956975 time: 0.0010s
Epoch: 9001 Accuracy: 0.6180 Loss: 1.26956975 time: 0.0020s
Epoch: 10001 Accuracy: 0.6180 Loss: 1.26956975 time: 0.0020s
Optimization Finished!
Total time elapsed: 17.4919s


In [13]:
d = {'Mask Percentage': [30, 50, 70], 'Initial Accuracy': init, 'Final Accuracy': final}

In [14]:
df = pd.DataFrame(data=d)

In [15]:
df

Unnamed: 0,Mask Percentage,Initial Accuracy,Final Accuracy
0,30,0.739453,0.849328
1,50,0.56421,0.744553
2,70,0.39592,0.617988
