In [24]:
import pathpyG as pp

print('Running on', pp.config['torch']['device'])

Running on cuda


# Load the synthetic dataset

In [25]:
# read the paths
paths = pp.PathData.from_csv('../data/temporal_clusters.ngram')

In [26]:
# Create the graph corresponding to paths
g = pp.HigherOrderGraph(paths, order=1)

# Plotting the time-aggregated network (first-order graph)
pp.plot(g)

In [27]:
# Create the second-order graph corresponding to paths
g2 = pp.HigherOrderGraph(paths, order=2)

# Plotting the second-order graph
pp.plot(g2)

# Prepare the data

In [28]:
# Define edge indices for first and second-order graphs
edge_index_g1 = g['edge_index']
edge_index_g2 = g2['edge_index']

In [29]:
# Define edge weights 
edge_weights = g['edge_weight']
edge_weights_higher_order = g2['edge_weight']

In [30]:
# Define bipartite mapping
import torch

def generate_bipatite_edge_index(mapping = 'last'):

    if mapping == 'last':
        bipartide_edge_index = torch.tensor([list(g2.node_index_to_id.keys()), 
                                     [i[1] for i in g2.node_index_to_id.values()]])
        
    elif mapping == 'first':
        bipartide_edge_index = torch.tensor([list(g2.node_index_to_id.keys()), 
                                     [i[0] for i in g2.node_index_to_id.values()]])
        
    else: 
        bipartide_edge_index = torch.tensor([list(g2.node_index_to_id.keys()) + list(g2.node_index_to_id.keys()), 
                                     [i[0] for i in g2.node_index_to_id.values()] + [i[1] for i in g2.node_index_to_id.values()]])
        
    return bipartide_edge_index


# Original DBGNN implementation mapping = 'last'
bipatite_edge_index = generate_bipatite_edge_index(mapping='last')

In [31]:
# Define the PyG data object
from torch_geometric.data import Data

num_nodes = max(max(g['edge_index'][0]), max(g['edge_index'][1])).item() + 1 # since indexing starts from 0
num_ho_nodes = max(max(g2['edge_index'][0]), max(g2['edge_index'][1])).item() + 1 # since indexing starts from 0

data = Data(
    num_nodes = num_nodes,
    num_ho_nodes = num_ho_nodes,
    x = torch.eye(num_nodes, num_nodes),
    x_h = torch.eye(num_ho_nodes, num_ho_nodes),
    edge_index = edge_index_g1,
    edge_index_higher_order = edge_index_g2,
    edge_weights = edge_weights.float(),
    edge_weights_higher_order = edge_weights_higher_order.float(),
    bipartite_edge_index = bipatite_edge_index,
    y = torch.tensor([0]*10 + [1]*10 + [2]*10)#torch.tensor([0 if int(i)<10  else (1 if int(i) <20 else 2) for i in g['node_id']])
)

# DBGNN 

In [32]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [33]:
from sklearn.metrics import balanced_accuracy_score

def test(model, data):
    model.eval()
    
    _, pred = model(data).max(dim=1)
    
    metrics_train = balanced_accuracy_score(
        data.y[data.train_mask].cpu(),
        pred[data.train_mask].cpu().numpy()
        )
    
    metrics_test = balanced_accuracy_score(
        data.y[data.test_mask].cpu(),
        pred[data.test_mask].cpu().numpy()
        )
    
    return metrics_train, metrics_test

In [34]:
from pathpyG.nn.dbgnn import DBGNN
from torch_geometric.transforms import RandomNodeSplit
from tqdm.notebook import tqdm

RandomNodeSplit(num_val=0, num_test=0.3)(data)

model = DBGNN(
        num_features =[num_nodes, num_ho_nodes],
        num_classes = len(data.y.unique()),
        hidden_dims = [16, 32, 8],
        p_dropout = 0.4
        ).to(device)

optimizer = torch.optim.Adam(model.parameters(),  lr=0.005)
loss_function = torch.nn.CrossEntropyLoss()

data = data.to(device)

losses = []
for epoch in tqdm(range(101)):
        output = model(data) 
        loss = loss_function(output[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        losses.append(loss)

        if epoch % 10 == 0:
                train_ba, test_ba  = test(model, data)
                print(f'Epoch: {epoch}, Loss: {loss}, Train balanced accuracy: {train_ba}, Test balanced accuracy: {test_ba}')

  0%|          | 0/101 [00:00<?, ?it/s]

Epoch: 0, Loss: 1.6261380910873413, Test balanced accuracy: 0.3333333333333333, Train balanced accuracy: 0.3333333333333333
Epoch: 10, Loss: 1.0365346670150757, Test balanced accuracy: 0.3333333333333333, Train balanced accuracy: 0.4351851851851852
Epoch: 20, Loss: 0.997630774974823, Test balanced accuracy: 0.3333333333333333, Train balanced accuracy: 0.4351851851851852
Epoch: 30, Loss: 0.9333955645561218, Test balanced accuracy: 0.3333333333333333, Train balanced accuracy: 0.47685185185185186
Epoch: 40, Loss: 0.8046084046363831, Test balanced accuracy: 0.3333333333333333, Train balanced accuracy: 0.5185185185185185
Epoch: 50, Loss: 0.5292219519615173, Test balanced accuracy: 0.0, Train balanced accuracy: 0.6018518518518519
Epoch: 60, Loss: 0.29320260882377625, Test balanced accuracy: 0.3333333333333333, Train balanced accuracy: 0.9583333333333334
Epoch: 70, Loss: 0.11550352722406387, Test balanced accuracy: 0.3333333333333333, Train balanced accuracy: 1.0
Epoch: 80, Loss: 0.0330780707