In [2]:
from typing import List, Tuple
import tqdm
import os
import os.path as osp

import torch
from torch.functional import Tensor
from torch_geometric.loader import DataLoader
import numpy as np
from torch_geometric.utils import to_dense_batch

from sgmatch.models.SimGNN import SimGNN
from tests.utils.dataset import load_dataset
from tests.utils.parser import parser
from tests.utils.data import PairData

In [3]:
def create_graph_pairs(train_dataset, test_dataset) -> Tuple[List]:
    train_graph_pairs = []
    with tqdm.tqdm(total=len(train_dataset)**2, desc='Train graph pairs completed: ') as bar:
        for idx1, graph1 in enumerate(train_dataset):
            for idx2, graph2 in enumerate(train_dataset):
                if idx1 == idx2:
                    continue
                # Initializing Data
                edge_index_s = graph1.edge_index
                x_s = graph1.x

                edge_index_t = graph2.edge_index
                x_t = graph2.x

                norm_ged = train_dataset.norm_ged[graph1.i, graph2.i]
                graph_sim = torch.exp(-norm_ged).unsqueeze(-1)
                
                # Making Graph Pair
                if isinstance(x_s, Tensor) and isinstance(x_t, Tensor):
                    graph_pair = PairData(edge_index_s=edge_index_s, x_s=x_s,
                                        edge_index_t=edge_index_t, x_t=x_t,
                                        y=graph_sim)
                    
                    # Saving all the Graph Pairs to the List for Batching and Data Loading
                    train_graph_pairs.append(graph_pair)
            bar.update(len(train_dataset))
    
    test_graph_pairs = []
    with tqdm.tqdm(total=len(test_dataset)*len(train_dataset), desc='Test graph pairs completed: ') as bar:
        for graph1 in test_dataset:
            for graph2 in train_dataset:
                # Initializing Data
                edge_index_s = graph1.edge_index
                x_s = graph1.x
                edge_index_t = graph2.edge_index
                x_t = graph2.x

                norm_ged = train_dataset.norm_ged[graph1.i, graph2.i]
                graph_sim = torch.exp(-norm_ged).unsqueeze(-1)
                
                # Making Graph Pair
                if isinstance(x_s, Tensor) and isinstance(x_t, Tensor):
                    graph_pair = PairData(edge_index_s=edge_index_s, x_s=x_s,
                                        edge_index_t=edge_index_t, x_t=x_t,
                                        y=graph_sim)
                
                    # Saving all the Graph Pairs to the List for Batching and Data Loading
                    test_graph_pairs.append(graph_pair)
            bar.update(len(train_dataset))
    
    return train_graph_pairs, test_graph_pairs


In [4]:
def train(train_loader, val_loader, model, loss_criterion, optimizer, device, num_epochs=10):
    batch_train_loss_sum = 0
    batch_val_loss_sum = 0

    for epoch in range(num_epochs):
        with tqdm.tqdm(total=len(train_loader), desc='Train batches completed: ') as bar:
            for batch_idx, train_batch in enumerate(train_loader):
                model.train()
                train_batch = train_batch.to(device)
                optimizer.zero_grad()

                pred_sim = model(train_batch.x_s, train_batch.edge_index_s, train_batch.x_t, 
                                train_batch.edge_index_t, train_batch.x_s_batch, train_batch.x_t_batch)
                mean_batch_loss = loss_criterion(pred_sim, train_batch.y)
                # Compute Gradients via Backpropagation
                mean_batch_loss.backward()
                # Update Parameters
                optimizer.step()
                batch_train_loss_sum += mean_batch_loss.item()*len(train_batch)
                
                bar.update(1)

        with tqdm.tqdm(total=len(val_loader), desc='Validation batches completed: ') as bar:
            for batch_idx, val_batch in enumerate(val_loader):
                model.eval()
                with torch.no_grad():
                    val_batch = val_batch.to(device)
                    pred_sim = model(val_batch.x_s, val_batch.edge_index_s, 
                            val_batch.x_t, val_batch.edge_index_t, val_batch.x_s_batch, val_batch.x_t_batch)
                    mean_val_loss = loss_criterion(pred_sim, val_batch.y)
                    batch_val_loss_sum += mean_val_loss.item()*len(val_batch)

                bar.update(1)
        
        if torch.cuda.is_available():
            torch.cuda.empty_cache() 
    
        # Printing Epoch Summary
        print(f"Epoch: {epoch+1}/{num_epochs} | Per Graph Train MSE: {batch_train_loss_sum / len(train_loader.dataset)} | Mean batch loss :{mean_batch_loss} \n   |Per Graph Validation MSE: {batch_val_loss_sum / len(val_loader.dataset)}| Mean_val_loss: {mean_val_loss}")


In [5]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

data_path="./data"
train_batch_size=128
val_batch_size=64
test_batch_size=256
learning_rate=0.01

train_dataset = load_dataset(dpath=data_path+"/aids/", name="GED", category="AIDS700nef", train=True)
test_dataset = load_dataset(dpath=data_path+"/aids/", name="GED", category="AIDS700nef", train=False)

train_ged_table = train_dataset.ged[:train_dataset.data.i[-1]+1, :train_dataset.data.i[-1]+1]
test_ged_table = test_dataset.ged[train_dataset.data.i[-1]+1:, train_dataset.data.i[-1]+1:]


train_graph_pairs, test_graph_pairs = torch.load(data_path+"/aids/graph_pairs/train_graph_pairs.pt"),\
                                              torch.load(data_path+"/aids/graph_pairs/test_graph_pairs.pt")

val_idxs = np.random.randint(len(train_graph_pairs), size=len(test_graph_pairs))
val_graph_pairs = [train_graph_pairs[idx] for idx in val_idxs]
train_idxs = set(range(len(train_graph_pairs))) - set(val_idxs)
train_graph_pairs = [train_graph_pairs[idx] for idx in train_idxs]
del val_idxs, train_idxs

train_loader = DataLoader(train_graph_pairs, batch_size = 128, follow_batch = ["x_s", "x_t"], shuffle = True)
val_loader = DataLoader(val_graph_pairs, batch_size = 64, follow_batch = ["x_s", "x_t"], shuffle = True)
test_loader = DataLoader(test_graph_pairs, batch_size = 256, follow_batch = ["x_s", "x_t"], shuffle = True)

model = SimGNN(input_dim=train_loader.dataset[0].x_s.shape[-1]).to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),learning_rate)




In [13]:
train_dataset
train_data1=torch.load("..\..\downloaded\processed\AIDS700nef_training.pt")
train_data1

(Data(edge_index=[2, 9898], i=[560], num_nodes=4991, x=[4991, 29]),
 defaultdict(dict,
             {'edge_index': tensor([   0,   18,   34,   54,   76,   90,   96,  116,  134,  152,  164,  180,
                       198,  216,  238,  250,  262,  272,  292,  306,  316,  332,  350,  372,
                       386,  392,  404,  422,  436,  450,  470,  488,  502,  514,  532,  552,
                       564,  582,  602,  624,  638,  652,  664,  678,  698,  718,  732,  752,
                       768,  778,  798,  818,  834,  852,  866,  880,  904,  922,  934,  950,
                       956,  962,  978,  992, 1010, 1024, 1038, 1056, 1066, 1082, 1104, 1116,
                      1136, 1150, 1164, 1180, 1200, 1220, 1234, 1256, 1274, 1294, 1314, 1330,
                      1352, 1360, 1378, 1400, 1416, 1436, 1454, 1472, 1486, 1504, 1522, 1540,
                      1550, 1568, 1590, 1604, 1620, 1636, 1656, 1676, 1698, 1720, 1742, 1752,
                      1768, 1784, 1798, 1814, 1830, 1

In [6]:
train(train_loader, val_loader, model, criterion, optimizer, device)

Train batches completed:   7%|▋         | 126/1904 [00:03<00:46, 37.91it/s]


KeyboardInterrupt: 

In [None]:
model.eval()

SimGNN(
  (convs): ModuleList(
    (0): GCNConv(29, 64)
    (1): GCNConv(64, 32)
    (2): GCNConv(32, 16)
  )
  (attention_layer): GlobalContextAttention(input_dim=16)
  (ntn_layer): NeuralTensorNetwork()
  (mlp): ModuleList(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): Linear(in_features=32, out_features=16, bias=True)
    (2): Linear(in_features=16, out_features=8, bias=True)
    (3): Linear(in_features=8, out_features=4, bias=True)
  )
  (scoring_layer): Linear(in_features=4, out_features=1, bias=True)
)

In [None]:
batch_test_loss_sum=0
with tqdm.tqdm(total=len(test_loader), desc='testing batches completed: ') as bar:
    for batch_idx, test_batch in enumerate(test_loader):
        model.eval()
        with torch.no_grad():
            test_batch = test_batch.to(device)
            pred_sim = model(test_batch.x_s, test_batch.edge_index_s, 
                    test_batch.x_t, test_batch.edge_index_t, test_batch.x_s_batch, test_batch.x_t_batch)
            mean_test_loss = criterion(pred_sim, test_batch.y)
            batch_test_loss_sum += mean_test_loss.item()*len(test_batch)

        bar.update(1)

batch_test_loss_sum/len(test_loader.dataset)

testing batches completed: 100%|██████████| 307/307 [00:08<00:00, 34.36it/s]


0.009314317466805175

LINUX 

In [None]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

data_path="./data"
train_batch_size=128
val_batch_size=64
test_batch_size=256
learning_rate=0.01

train_dataset = load_dataset(dpath=data_path+"/linux/", name="GED", category="LINUX", train=True)
test_dataset = load_dataset(dpath=data_path+"/linux/", name="GED", category="LINUX", train=False)

train_ged_table = train_dataset.ged[:train_dataset.data.i[-1]+1, :train_dataset.data.i[-1]+1]
test_ged_table = test_dataset.ged[train_dataset.data.i[-1]+1:, train_dataset.data.i[-1]+1:]


train_graph_pairs, test_graph_pairs = create_graph_pairs(train_dataset, test_dataset)
if not osp.exists(data_path+"/linux/graph_pairs"):
    os.makedirs(data_path+"/linux/graph_pairs")
torch.save(train_graph_pairs, data_path+"/linux/graph_pairs/train_graph_pairs.pt")
torch.save(test_graph_pairs, data_path+"/linux/graph_pairs/test_graph_pairs.pt")

val_idxs = np.random.randint(len(train_graph_pairs), size=len(test_graph_pairs))
val_graph_pairs = [train_graph_pairs[idx] for idx in val_idxs]
train_idxs = set(range(len(train_graph_pairs))) - set(val_idxs)
train_graph_pairs = [train_graph_pairs[idx] for idx in train_idxs]
del val_idxs, train_idxs

train_loader = DataLoader(train_graph_pairs, batch_size = 128, follow_batch = ["x_s", "x_t"], shuffle = True)
val_loader = DataLoader(val_graph_pairs, batch_size = 64, follow_batch = ["x_s", "x_t"], shuffle = True)
test_loader = DataLoader(test_graph_pairs, batch_size = 256, follow_batch = ["x_s", "x_t"], shuffle = True)

model_linux = SimGNN(input_dim=train_loader.dataset[0].x_s.shape[-1]).to(device)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model_linux.parameters(),learning_rate)

NameError: name 'torch' is not defined

In [None]:
data_path="./data"

train_dataset_aids = load_dataset(dpath=data_path+"/aids/", name="GED", category="AIDS700nef", train=True)
test_dataset_aids = load_dataset(dpath=data_path+"/aids/", name="GED", category="AIDS700nef", train=False)
train_dataset_linux=load_dataset(dpath=data_path+"/linux/",name="GED",category="LINUX",train=True)
test_dataset_linux=load_dataset(dpath=data_path+"/linux/",name="GED",category="LINUX",train=False)

train_dataset_aids[0]

Data(edge_index=[2, 18], i=[1], x=[10, 29], num_nodes=10)