In [150]:
import pandas as pd
import networkx as nx
import os.path as osp

import torch
import torch_geometric
from torch_geometric.data import Dataset, download_url
from torch_geometric.utils.convert import from_networkx
import numpy as np


import torch.nn.functional as F
from torch_geometric.nn import GCNConv,Linear
from torch_geometric.nn import GAE, Node2Vec,VGAE
from torch.utils.tensorboard import SummaryWriter
from torch_geometric.nn.models.autoencoder import ARGVA

from torch_geometric.transforms import RandomLinkSplit
from tqdm import tqdm

In [2]:
torch.manual_seed(0)

<torch._C.Generator at 0x2219b8545b0>

In [5]:
# import dataset
df=pd.read_csv('PPI.csv')
G=nx.from_pandas_edgelist(df,'Official Symbol Interactor A','Official Symbol Interactor B' )
#Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
#G0 = G.subgraph(Gcc[0])
G=nx.convert_node_labels_to_integers(G)
pyg_graph = from_networkx(G)

node_embedding=Node2Vec(pyg_graph.edge_index,20,16,10)

#embedding del nodo 0
#node_embedding.forward().data[0]# cambia ogni volta anche con il seed 

#aggiungiamo gli embedding come features dei nodi
for n in G.nodes():
    G.nodes[n]['x']=np.array(node_embedding.forward().data[n])
    
pyg_graph = from_networkx(G)

  data[key] = torch.tensor(value)


In [10]:
transform = RandomLinkSplit(is_undirected=False,split_labels=True,
                      neg_sampling_ratio=1.0,
                      key = "edge_label",
                      disjoint_train_ratio=0,
                      num_val =0)
train_data, val_data, test_data = transform(pyg_graph)

## GAE

In [14]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

In [40]:
def train(data):
    model.train()
    optimizer.zero_grad()
    z = model.encode(data.x, data.edge_index)
    pos_edge_index=data.pos_edge_label_index
    neg_edge_index=data.neg_edge_label_index
    loss = model.recon_loss(z, pos_edge_index,neg_edge_index) 
    loss.backward()
    optimizer.step()
    return float(loss)


def test(data):
    model.eval()
    with torch.no_grad():
        z = model.encode(data.x,data.edge_index)
        pos_edge_index=data.pos_edge_label_index
        neg_edge_index=data.neg_edge_label_index
    return model.test(z, pos_edge_index, neg_edge_index)

In [41]:
# parameters
out_channels = 20   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model
model = GAE(GCNEncoder(num_features, out_channels))
model = model.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [38]:
writer = SummaryWriter('runs_3/GAE_experiment'+'20d_100_epochs')

In [39]:
for epoch in range(1, epochs + 1):
    loss = train(train_data)
    auc, ap = test(test_data)
    print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch)  

Epoch: 001, AUC: 0.7485, AP: 0.6955
Epoch: 002, AUC: 0.7605, AP: 0.7116
Epoch: 003, AUC: 0.7772, AP: 0.7329
Epoch: 004, AUC: 0.7943, AP: 0.7550
Epoch: 005, AUC: 0.8109, AP: 0.7766
Epoch: 006, AUC: 0.8262, AP: 0.7965
Epoch: 007, AUC: 0.8386, AP: 0.8128
Epoch: 008, AUC: 0.8479, AP: 0.8255
Epoch: 009, AUC: 0.8553, AP: 0.8358
Epoch: 010, AUC: 0.8618, AP: 0.8449
Epoch: 011, AUC: 0.8676, AP: 0.8530
Epoch: 012, AUC: 0.8722, AP: 0.8599
Epoch: 013, AUC: 0.8757, AP: 0.8653
Epoch: 014, AUC: 0.8785, AP: 0.8696
Epoch: 015, AUC: 0.8810, AP: 0.8733
Epoch: 016, AUC: 0.8826, AP: 0.8763
Epoch: 017, AUC: 0.8832, AP: 0.8784
Epoch: 018, AUC: 0.8835, AP: 0.8800
Epoch: 019, AUC: 0.8847, AP: 0.8818
Epoch: 020, AUC: 0.8859, AP: 0.8834
Epoch: 021, AUC: 0.8854, AP: 0.8836
Epoch: 022, AUC: 0.8842, AP: 0.8832
Epoch: 023, AUC: 0.8849, AP: 0.8838
Epoch: 024, AUC: 0.8858, AP: 0.8844
Epoch: 025, AUC: 0.8840, AP: 0.8831
Epoch: 026, AUC: 0.8818, AP: 0.8813
Epoch: 027, AUC: 0.8822, AP: 0.8815
Epoch: 028, AUC: 0.8824, AP:

## DeepGAE

In [42]:
class DeepGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DeepGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True)
        self.conv2 = GCNConv(2 * out_channels, 2 * out_channels, cached=True)
        self.conv3 = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index,edge_weight=None):
        x = self.conv1(x, edge_index,edge_weight=edge_weight).relu()
        x = self.conv2(x, edge_index,edge_weight=edge_weight).relu()
        return self.conv3(x, edge_index,edge_weight=edge_weight)

In [48]:
# parameters
out_channels = 20   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model
model = GAE(DeepGCNEncoder(num_features, out_channels))
model = model.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [49]:
writer = SummaryWriter('runs_3/DeepGAE_experiment'+'20d_100_epochs')

for epoch in tqdm(range(1, epochs + 1)):
    loss = train(train_data)
    auc, ap = test(test_data)
    #print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch) 

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [08:21<00:00,  5.02s/it]


## VGAE

In [50]:
class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True)
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)

In [54]:
out_channels = 20   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = VGAE(VariationalGCNEncoder(num_features, out_channels)) 
model = model.to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [52]:
def train_VGAE(data):
    model.train()
    optimizer.zero_grad()
    z = model.encode(data.x, data.edge_index)
    pos_edge_index=data.pos_edge_label_index
    neg_edge_index=data.neg_edge_label_index
    loss = model.recon_loss(z, pos_edge_index,neg_edge_index) 
    loss = loss + (1 / data.x.shape[0]) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)

In [55]:
writer = SummaryWriter('runs_3/VGAE_experiment'+'20d_100_epochs')

for epoch in tqdm(range(1, epochs + 1)):
    loss = train_VGAE(train_data)
    auc, ap = test(test_data)
    #print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch) 

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [06:25<00:00,  3.85s/it]


## ARGVA

In [59]:
class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True)
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)
    
class Discriminator(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.lin1 = Linear(in_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, hidden_channels)
        self.lin3 = Linear(hidden_channels, out_channels)

    def forward(self, x):
        x = self.lin1(x).relu()
        x = self.lin2(x).relu()
        return self.lin3(x)

In [60]:
def train_ARGVA(data):
    model.train()
    encoder_optimizer.zero_grad()
    z = model.encode(data.x, data.edge_index)
    pos_edge_index=data.pos_edge_label_index
    neg_edge_index=data.neg_edge_label_index
    
    for i in range(5):  
        #discriminator.train()
        discriminator_optimizer.zero_grad()
        discriminator_loss = model.discriminator_loss(z)
        discriminator_loss.backward()
        discriminator_optimizer.step()
 
    loss = model.recon_loss(z, pos_edge_index,neg_edge_index) 
    loss = loss + model.reg_loss(z)
    loss = loss + (1 / data.x.shape[0]) * model.kl_loss()
    
    loss.backward()
    encoder_optimizer.step()
    return float(loss)

In [63]:
embedding = 20   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100

encoder = VariationalGCNEncoder(num_features, embedding)

discriminator = Discriminator(in_channels=embedding, hidden_channels=embedding//2, 
                              out_channels=1)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ARGVA(encoder, discriminator)
model = model.to(device)

discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.001)
encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=0.005)

In [64]:
writer = SummaryWriter('runs_3/ARGVA_experiment'+'20d_100_epochs')

for epoch in tqdm(range(1, epochs + 1)):
    loss = train_ARGVA(train_data)
    auc, ap = test(test_data)
    #print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch) 

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [05:46<00:00,  3.46s/it]


## GAE with Linear Decoder

In [119]:
class GCNDecoder(torch.nn.Module):
    def __init__(self, latent_dim):
        super(GCNDecoder, self).__init__()
        self.lin1 = Linear(latent_dim,latent_dim)
        self.lin2 = Linear(latent_dim,latent_dim//2)
        self.lin3 = Linear(latent_dim//2,1)

    def forward(self, z, edge_index, sigmoid=True):

        z = (z[edge_index[0]] * z[edge_index[1]])#.sum(dim=1)
        z = self.lin1(z).relu()
        z = self.lin2(z).relu()
        z = self.lin3(z)
        z=z.squeeze()
        
        return torch.sigmoid(z) if sigmoid else value    

In [120]:
# parameters
out_channels = 20   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = GAE(GCNEncoder(num_features, out_channels),GCNDecoder(out_channels))
model = model.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [121]:
writer = SummaryWriter('runs_3/GAE+dec_experiment'+'20d_100_epochs')

for epoch in tqdm(range(1, epochs + 1)):
    loss = train(train_data)
    auc, ap = test(test_data)
    #print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch) 

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [05:24<00:00,  3.24s/it]


## DeepGAE with Linear Decoder

In [125]:
# parameters
out_channels = 20   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = GAE(DeepGCNEncoder(num_features, out_channels),GCNDecoder(out_channels))
model = model.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [126]:
writer = SummaryWriter('runs_3/DeepGAE+dec_experiment'+'20d_100_epochs')

for epoch in tqdm(range(1, epochs + 1)):
    loss = train(train_data)
    auc, ap = test(test_data)
    #print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch) 

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [07:59<00:00,  4.80s/it]


## VGAE with Linear Decoder

In [129]:
out_channels = 20   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = VGAE(VariationalGCNEncoder(num_features, out_channels),GCNDecoder(out_channels)) 
model = model.to(device)


optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [130]:
writer = SummaryWriter('runs_3/VGAE+dec_experiment'+'20d_100_epochs')

for epoch in tqdm(range(1, epochs + 1)):
    loss = train_VGAE(train_data)
    auc, ap = test(test_data)
    #print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch) 

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [06:33<00:00,  3.94s/it]


## ARGVA with linear Decoder (AUC e AP basse)

In [147]:
class Discriminator_sig(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.lin1 = Linear(in_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, hidden_channels)
        self.lin3 = Linear(hidden_channels, out_channels)

    def forward(self, x):
        x = self.lin1(x).relu()
        x = self.lin2(x).relu()
        return torch.sigmoid(self.lin3(x))#added sigmoid

In [145]:
embedding = 10   #embedding 
num_features = train_data.x.shape[1] 
epochs = 100

encoder = VariationalGCNEncoder(num_features, embedding)

#discriminator = Discriminator(in_channels=embedding, hidden_channels=embedding//2, 
#                              out_channels=1)
    
discriminator = Discriminator_sig(in_channels=embedding, hidden_channels=embedding//2, 
                              out_channels=1)
decoder=GCNDecoder(embedding)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = ARGVA(encoder, discriminator,decoder)
model = model.to(device)

discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.001)#0.001
encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=0.005)#0.005

In [None]:
writer = SummaryWriter('runs_3/ARGVAsig+dec_experiment'+'10d_100_epochs')

for epoch in tqdm(range(1, epochs + 1)):
    loss = train_ARGVA(train_data)
    auc, ap = test(test_data)
    #print('Epoch: {:03d}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, auc, ap))
    
    writer.add_scalar('loss train',loss,epoch)
    writer.add_scalar('auc train',auc,epoch) 
    writer.add_scalar('ap train',ap,epoch) 
writer.close()