In [1]:
##layer.py
from torch_geometric.nn import GCNConv
from torch_geometric.nn.pool.topk_pool import topk,filter_adj
from torch.nn import Parameter
import torch


class SAGPool(torch.nn.Module):
    def __init__(self,in_channels,ratio=0.8,Conv=GCNConv,non_linearity=torch.tanh):
        super(SAGPool,self).__init__()
        self.in_channels = in_channels
        self.ratio = ratio
        self.score_layer = Conv(in_channels,1)
        self.non_linearity = non_linearity
    def forward(self, x, edge_index, edge_attr=None, batch=None):
        if batch is None:
            batch = edge_index.new_zeros(x.size(0))
        #x = x.unsqueeze(-1) if x.dim() == 1 else x
        score = self.score_layer(x,edge_index).squeeze()
        #the following two lines are added to fix a bug (IndexError)
        if len(score.size()) == 0:
            score = score.unsqueeze(0)
        perm = topk(score, self.ratio, batch)
        x = x[perm] * self.non_linearity(score[perm]).view(-1, 1)
        batch = batch[perm]
        edge_index, edge_attr = filter_adj(
            edge_index, edge_attr, perm, num_nodes=score.size(0))

        return x, edge_index, edge_attr, batch, perm

In [2]:
##utils.py
import torch
from torch_geometric.loader import DataLoader
def get_adj(data):
    edge_index = data.edge_index
    n = data.x.shape[0]
    adj = torch.zeros([n,n])
    for i in range(edge_index.shape[1]):
        j = edge_index[0,i].item()
        k = edge_index[1,i].item()
    #     print(adj[j,k],adj[k,j])
    #     print(j,k)
        adj[j,k]=1
    #     print("edge_added")
    #     print(adj[j,k],adj[k,j])
    return adj
# def recon_loss(data,pred_adj):
#     criterion = torch.nn.BCEWithLogitsLoss()
#     return criterion(get_adj(data),pred_adj)


In [3]:
from platform import node
from numpy import std
import torch
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.nn.models import InnerProductDecoder
import torch.nn.functional as F
from layers import SAGPool
from torch_geometric.loader import DataLoader

from utils import get_adj
class GraphEncoder(torch.nn.Module):
    def __init__(self,args):
        super(GraphEncoder, self).__init__()
        self.args = args
        self.num_features = args.num_features
        self.nhid = args.nhid
        self.pooling_ratio = args.pooling_ratio
        self.dropout_ratio = args.dropout_ratio
        self.variational = args.variational
        # Encoder Layers
        self.conv1 = GCNConv(self.num_features, self.nhid)
        self.pool1 = SAGPool(self.nhid, ratio=self.pooling_ratio)
        self.conv2 = GCNConv(self.nhid, self.nhid)
        self.pool2 = SAGPool(self.nhid, ratio=self.pooling_ratio)
        self.conv3 = GCNConv(self.nhid, self.nhid)
        self.pool3 = SAGPool(self.nhid, ratio=self.pooling_ratio)
        # #Latent Layer transformation for Graph VAE
        if self.variational:
            self.latent_embeding_size = args.latent_embeding_size
            self.mu_transform = torch.nn.Linear(self.nhid*2, self.latent_embeding_size)
            self.log_var_transform = torch.nn.Linear(self.nhid*2, self.latent_embeding_size)

    def forward(self,data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch, _ = self.pool1(x, edge_index, None, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index))
        x, edge_index, _, batch, _ = self.pool2(x, edge_index, None, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index))
        x, edge_index, _, batch, _ = self.pool3(x, edge_index, None, batch)
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3
        # # Latent transformation layers for mu and sigma 
        if self.variational:
            mu = self.mu_transform(x)
            log_var = self.log_var_transform(x)
            return mu,log_var
        else:
            return x

class GraphDecoder(torch.nn.Module):
    def __init__(self,args) -> None:
        super(GraphDecoder,self).__init__()
        self.args = args
        self.num_node_features = args.num_node_features
        self.nhid = args.nhid
        self.decoder_hidden_size = args.decoder_hidden_size
        self.max_num_nodes = args.max_num_nodes
        self.output_dim = args.max_num_nodes*(args.max_num_nodes-1)//2
        self.latent_embeding_size = args.latent_embeding_size
        self.variational = args.variational
        self.linear1 = torch.nn.Linear(self.latent_embeding_size, self.decoder_hidden_size)
        self.linear2 = torch.nn.Linear(self.decoder_hidden_size, self.output_dim)
        self.linear3 = torch.nn.Linear(self.decoder_hidden_size,self.num_node_features*self.max_num_nodes)

    def forward(self,z):
        x = F.leaky_relu(self.linear1(z))
        adj_upper = self.linear2(x)
        adj_upper = torch.sigmoid(adj_upper)
        adj = self.recon_adj(adj_upper)
        node_features= F.leaky_relu(self.linear3(x))
        node_features = node_features.view(self.max_num_nodes,self.num_node_features)
        return adj,node_features
    
    def recon_adj(self,adj_upper):
        adj = torch.zeros(self.max_num_nodes,self.max_num_nodes)
        adj[torch.triu(torch.ones(self.max_num_nodes,self.max_num_nodes),diagonal=1)==1] = adj_upper
        diag = torch.diag(torch.diag(adj, 0))
        adj = adj + torch.transpose(adj, 0, 1) - diag
        return adj
    
    def recon_loss(self,data,adj_pred,node_features_pred):
        adj_recon_loss = F.mse_loss(get_adj(data),adj_pred)
        node_features_recon_loss = F.mse_loss(data.x,node_features_pred)
        loss = adj_recon_loss + node_features_recon_loss
        return loss

In [4]:
## train.py
import torch_geometric.nn as nn
from model import GraphEncoder,GraphDecoder
import torch
import numpy as np
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric import utils
import torch.nn.functional as F
import argparse
import os
from torch.utils.data import random_split
from torch.utils.data.dataset import Subset
from utils import get_adj
parser = argparse.ArgumentParser()

parser.add_argument('--seed', type=int, default=777,
                    help='seed')
parser.add_argument('--batch_size', type=int, default=128,
                    help='batch size')
parser.add_argument('--lr', type=float, default=0.0008,
                    help='learning rate')
parser.add_argument('--weight_decay', type=float, default=0.0001,
                    help='weight decay')
parser.add_argument('--pooling_ratio', type=float, default=0.5,
                    help='pooling ratio')
parser.add_argument('--dropout_ratio', type=float, default=0.5,
                    help='dropout ratio')
parser.add_argument('--dataset', type=str, default='BOTDS',
                    help='dataset sub-directory under dir: data. e.g. BOTDS')
parser.add_argument('--epochs', type=int, default=1000,
                    help='maximum number of epochs')
parser.add_argument('--val_epochs', type=int, default=25,
                    help='maximum number of validation epochs')
parser.add_argument('--patience', type=int, default=50,
                    help='patience for earlystopping')
parser.add_argument('--pooling_layer_type', type=str, default='GCNConv',
                    help='type of pooling layer')
parser.add_argument('--use_node_attr', type=bool, default=True,
                    help='node features')
parser.add_argument('--variational', type=bool, default=False,
                    help='Varitional Graph Auto Encoder')

#Load GPU (If present)
args = parser.parse_args()
# args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
args.device = torch.device("cpu")
dataset = TUDataset('data',name = args.dataset,use_node_attr=args.use_node_attr)
args.nhid = 128
args.num_features = dataset.num_features
args.num_node_features = dataset.num_node_features
loader = DataLoader(dataset, batch_size=1, shuffle=False)
args.max_num_nodes = 75
args.variational = True
args.latent_embeding_size = args.nhid*2
args.decoder_hidden_size = args.latent_embeding_size*4
if args.variational:
    model = nn.VGAE(GraphEncoder(args),GraphDecoder(args))
else:
    model = nn.GAE(GraphEncoder(args),GraphDecoder(args))
model.to(args.device)
optimizer = torch.optim.Adam(model.parameters(),lr = args.lr,weight_decay=args.weight_decay)
j =0
## Total 46 examples of sizes 75 nodes 40 for training remaining 6 for training
for i,data in enumerate(loader):
    if data.num_nodes == args.max_num_nodes:
        j=j+1
        optimizer.zero_grad()
        z = model.encode(data)
        adj,node_features= model.decode(z)
        loss = model.decoder.recon_loss(data,adj,node_features)
        if args.variational:
            loss = loss + model.kl_loss()
        print(loss)
        loss.backward()
        optimizer.step()
        if(j>40):
            break
j=0
for i,data in enumerate(loader):
    if data.num_nodes == args.max_num_nodes:
        j=j+1
        if j<=40:
            continue
        z = model.encode(data)
        adj,node_features= model.decode(z)
        loss = model.decoder.recon_loss(data,adj,node_features)
        if args.variational:
            loss = loss + model.kl_loss()
        print(f"loss:{loss}")
        # print(adj)
        # print(get_adj(data))
        # print(node_features)
        


# num_training = int(len(dataset)*0.6)
# num_val = int(len(dataset)*0.1)
# num_test = len(dataset) - (num_training+num_val)
# training_set,validation_set,test_set = random_split(dataset,[num_test,num_training,num_val])
# train_loader = DataLoader(training_set, batch_size=1, shuffle=True)
# val_loader = DataLoader(validation_set, batch_size=1, shuffle=False)
# test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

# it = test_loader._get_iterator()
# sample = it.next()
# # print(dataset.num_node_features)
# # print(sample.batch,sample.x)
# print(get_adj(sample))
# z= model.encode(sample)
# adj,node_features,loss= model.decode(sample,z)
# print(adj,adj.shape)
# print(node_features,node_features.shape)
# print(criterion(get_adj(sample),adj),loss)
# # loss = model.recon_loss(z.t(),sample.edge_index)
# # print(loss)
# # print(criterion(get_adj(sample),adj))
# loss = 0
# epoch_loss = []

# for epoch in range(2):
#     model.train()
#     for i,data in enumerate(train_loader):
#         optimizer.zero_grad()
#         z = model.encode(data)
#         adj,node_features,loss = model.decode(data,z)
#         loss.backward()
#         optimizer.step()
#     model.eval()
#     avg_loss =0
#     for i,data in enumerate(val_loader):
#         z = model.encode(data)
#         adj,node_features,loss = model.decode(data,z)
#         avg_loss += loss
#     epoch_loss.append(avg_loss/(i+1))
#     print(f'epoch: {epoch} avg loss: {epoch_loss[epoch]}')
# print(epoch_loss)
# z= model.encode(sample)
# adj,node_features,loss = model.decode(sample,z)
# print(adj,adj.shape)
# print(adj,node_features.shape)
# print(sample.x,sample.x.shape)
# print(loss)
# print(criterion(get_adj(sample),adj))
# print(criterion(sample.x,node_features))

usage: ipykernel_launcher.py [-h] [--seed SEED] [--batch_size BATCH_SIZE]
                             [--lr LR] [--weight_decay WEIGHT_DECAY]
                             [--pooling_ratio POOLING_RATIO]
                             [--dropout_ratio DROPOUT_RATIO]
                             [--dataset DATASET] [--epochs EPOCHS]
                             [--val_epochs VAL_EPOCHS] [--patience PATIENCE]
                             [--pooling_layer_type POOLING_LAYER_TYPE]
                             [--use_node_attr USE_NODE_ATTR]
                             [--variational VARIATIONAL]
ipykernel_launcher.py: error: unrecognized arguments: -f /home/soham/.local/share/jupyter/runtime/kernel-66fbff8f-50e5-4010-b20d-b71ca8cf3ddb.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


Graph Auto Encoder

In [8]:
!python3 train.py

Started Training
tensor(0.2635, grad_fn=<AddBackward0>)
tensor(0.2603, grad_fn=<AddBackward0>)
tensor(0.2578, grad_fn=<AddBackward0>)
tensor(0.2546, grad_fn=<AddBackward0>)
tensor(0.2515, grad_fn=<AddBackward0>)
tensor(0.2466, grad_fn=<AddBackward0>)
tensor(0.2392, grad_fn=<AddBackward0>)
tensor(0.2303, grad_fn=<AddBackward0>)
tensor(0.2203, grad_fn=<AddBackward0>)
tensor(0.2051, grad_fn=<AddBackward0>)
tensor(0.1929, grad_fn=<AddBackward0>)
tensor(0.1793, grad_fn=<AddBackward0>)
tensor(0.1630, grad_fn=<AddBackward0>)
tensor(0.1376, grad_fn=<AddBackward0>)
tensor(0.1243, grad_fn=<AddBackward0>)
tensor(0.1232, grad_fn=<AddBackward0>)
tensor(0.1049, grad_fn=<AddBackward0>)
tensor(0.0840, grad_fn=<AddBackward0>)
tensor(0.0815, grad_fn=<AddBackward0>)
tensor(0.0828, grad_fn=<AddBackward0>)
tensor(0.0801, grad_fn=<AddBackward0>)
tensor(0.0736, grad_fn=<AddBackward0>)
tensor(0.0786, grad_fn=<AddBackward0>)
tensor(0.0811, grad_fn=<AddBackward0>)
tensor(0.0661, grad_fn=<AddBackward0>)
tensor(0

Variational Graph Auto Encoder

In [7]:
!python3 train.py --variational True

Started Training
tensor(0.8057, grad_fn=<AddBackward0>)
tensor(0.7201, grad_fn=<AddBackward0>)
tensor(0.6673, grad_fn=<AddBackward0>)
tensor(0.6103, grad_fn=<AddBackward0>)
tensor(0.5631, grad_fn=<AddBackward0>)
tensor(0.5064, grad_fn=<AddBackward0>)
tensor(0.4466, grad_fn=<AddBackward0>)
tensor(0.3946, grad_fn=<AddBackward0>)
tensor(0.3319, grad_fn=<AddBackward0>)
tensor(0.2903, grad_fn=<AddBackward0>)
tensor(0.2439, grad_fn=<AddBackward0>)
tensor(0.2052, grad_fn=<AddBackward0>)
tensor(0.2271, grad_fn=<AddBackward0>)
tensor(0.2242, grad_fn=<AddBackward0>)
tensor(0.1667, grad_fn=<AddBackward0>)
tensor(0.2262, grad_fn=<AddBackward0>)
tensor(0.1309, grad_fn=<AddBackward0>)
tensor(0.1177, grad_fn=<AddBackward0>)
tensor(0.0994, grad_fn=<AddBackward0>)
tensor(0.1016, grad_fn=<AddBackward0>)
tensor(0.0995, grad_fn=<AddBackward0>)
tensor(0.0835, grad_fn=<AddBackward0>)
tensor(0.0983, grad_fn=<AddBackward0>)
tensor(0.1181, grad_fn=<AddBackward0>)
tensor(0.0903, grad_fn=<AddBackward0>)
tensor(0