In [42]:
import numpy as np
import torch
import torch_geometric
from torch_geometric.nn import VGAE, GCNConv
from torch_geometric.utils import from_networkx, erdos_renyi_graph
import networkx as nx
from scipy.stats import spearmanr

In [43]:
VGAE.__dict__

mappingproxy({'__module__': 'torch_geometric.nn.models.autoencoder',
              '__doc__': 'The Variational Graph Auto-Encoder model from the\n    `"Variational Graph Auto-Encoders" <https://arxiv.org/abs/1611.07308>`_\n    paper.\n\n    Args:\n        encoder (torch.nn.Module): The encoder module to compute :math:`\\mu`\n            and :math:`\\log\\sigma^2`.\n        decoder (torch.nn.Module, optional): The decoder module. If set to\n            :obj:`None`, will default to the\n            :class:`torch_geometric.nn.models.InnerProductDecoder`.\n            (default: :obj:`None`)\n    ',
              '__init__': <function torch_geometric.nn.models.autoencoder.VGAE.__init__(self, encoder: torch.nn.modules.module.Module, decoder: Optional[torch.nn.modules.module.Module] = None)>,
              'reparametrize': <function torch_geometric.nn.models.autoencoder.VGAE.reparametrize(self, mu: torch.Tensor, logstd: torch.Tensor) -> torch.Tensor>,
              'encode': <function torch_g

In [44]:
class Encoder(torch.nn.Module):
    def __init__(self, num_features, hidden_channels, out_channels):
        super(Encoder, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv_mu = GCNConv(hidden_channels, out_channels)
        self.conv_logstd = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)

def generate_correlated_params(n, correlation):
    mean = [0, 0]
    cov = [[1, correlation], [correlation, 1]]
    return np.random.multivariate_normal(mean, cov, n)

def generate_graph(n, p):
    return nx.erdos_renyi_graph(n, p)

def get_embedding(model, data):
    model.eval()
    with torch.no_grad():
        z = model.encode(data.x, data.edge_index)
    return z.mean(dim=0).numpy()


In [46]:
from tqdm import tqdm

def experiment(num_graphs=200, nodes=50, hidden_channels=32, out_channels=16, correlation=0.5):
    print("Starting experiment...")
    # Generate correlated parameters
    params = generate_correlated_params(num_graphs, correlation)
    # Normalize params with sigmoid
    params = torch.sigmoid(torch.tensor(params))
    print(f"Generated {num_graphs} pairs of correlated parameters")
    
    # Generate graphs and prepare dataset
    all_data = []
    for i, (p1, p2) in enumerate(tqdm(params, desc="Generating graph pairs")):
        g1 = generate_graph(nodes, p1)
        g2 = generate_graph(nodes, p2)
        
        data1 = from_networkx(g1)
        data2 = from_networkx(g2)
        
        data1.x = torch.tensor(list(dict(g1.degree()).values()), dtype=torch.float).view(-1, 1)
        data2.x = torch.tensor(list(dict(g2.degree()).values()), dtype=torch.float).view(-1, 1)
        
        all_data.extend([data1, data2])
    
    print(f"Generated {len(all_data)} graphs")

    # Create VGAE models
    model1 = VGAE(Encoder(1, hidden_channels, out_channels))
    model2 = VGAE(Encoder(1, hidden_channels, out_channels))

    optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.01)
    optimizer2 = torch.optim.Adam(model2.parameters(), lr=0.01)

    # Train VGAE models on the entire dataset
    print("Training VGAE models...")
    for epoch in tqdm(range(100), desc="Training epochs"):
        model1.train()
        model2.train()
        total_loss1 = 0
        total_loss2 = 0
        
        for data in all_data:
            optimizer1.zero_grad()
            z1 = model1.encode(data.x, data.edge_index)
            loss1 = model1.recon_loss(z1, data.edge_index)
            loss1 += (1 / data.num_nodes) * model1.kl_loss()
            loss1.backward()
            optimizer1.step()
            total_loss1 += loss1.item()
            
            optimizer2.zero_grad()
            z2 = model2.encode(data.x, data.edge_index)
            loss2 = model2.recon_loss(z2, data.edge_index)
            loss2 += (1 / data.num_nodes) * model2.kl_loss()
            loss2.backward()
            optimizer2.step()
            total_loss2 += loss2.item()
        
        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}, Avg Loss1: {total_loss1/len(all_data):.4f}, Avg Loss2: {total_loss2/len(all_data):.4f}")

    print("VGAE models trained")

    # Get embeddings for all graphs using the trained models
    embeddings1 = []
    embeddings2 = []
    
    print("Calculating embeddings...")
    for data in tqdm(all_data, desc="Processing graphs"):
        emb1 = get_embedding(model1, data)
        emb2 = get_embedding(model2, data)
        embeddings1.append(emb1)
        embeddings2.append(emb2)

    print("Calculating correlations...")
    # Calculate correlations
    param_corr, _ = spearmanr(params[:, 0], params[:, 1])
    emb_corr, _ = spearmanr(np.array(embeddings1).flatten(), np.array(embeddings2).flatten())

    return param_corr, emb_corr

print("Running experiment...")
param_corr, emb_corr = experiment()

Running experiment...
Starting experiment...
Generated 200 pairs of correlated parameters


Generating graph pairs:  20%|██        | 40/200 [00:00<00:04, 37.90it/s]

In [41]:
print(f"Parameter correlation: {param_corr}")
print(f"Embedding correlation: {emb_corr}")
print("Experiment completed.")

Parameter correlation: -1.0
Embedding correlation: -0.020076036018167577
Experiment completed.
