# Synthetic Experiments using Stochastic Block Models

In [1]:
import pickle as pkl
from torch_geometric.data import DataLoader
from itertools import combinations
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from src.utils.CreateFeatures import CreateFeatures
from src.pygcn.GCN_synthetic import SiameseGNN
from torch_geometric.data import DataLoader

import torch
import torch.nn as nn
import torch
import torch_geometric.data as data

In [12]:
def create_synthetic_pairs(data, cp_time):
    all_loader = DataLoader(data, batch_size=32)

    all_pairs = list(combinations(range(200), 2))
    random_pairs = random.sample(all_pairs, 1000)

    graph_pairs = []
    for i in random_pairs:
        first, second = i[0], i[1]

        if first < cp_time and second < cp_time:
            y_label = 1
        elif first >= cp_time and second >= cp_time:
            y_label = 1
        else:
            y_label = 0

        graph_pairs.append((data[first], data[second], y_label))

    flattened_train, flattened_test = train_test_split(graph_pairs, test_size=0.40, random_state=42)
    flattened_test, flattened_val = train_test_split(graph_pairs, test_size=0.5, random_state=42)

    return flattened_train, flattened_test, flattened_val

In [13]:
def run_model(model, train_data, val_data):
    torch.manual_seed(42)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)  # Adjust step_size and gamma as needed
    criterion = nn.BCELoss()

    for epoch in tqdm(range(5)):
        model.train()
        train_losses = []
        for data1, data2, label in train_data:

            optimizer.zero_grad()
            out = model(data1, data2)
            label = torch.tensor(label).view(1).float()
            loss = criterion(out.squeeze(0), label)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        scheduler.step()  # Add this line to update the learning rate

        model.eval()
        with torch.no_grad():
            val_losses = []

            val_pred = []
            val_truth = []

            correct = 0
            total = 0
            for data1, data2, label in val_data:
                out = model(data1, data2)
                label = torch.tensor(label).view(1).float()
                val_loss = criterion(out.squeeze(0), label)
                val_losses.append(val_loss.item())

                predictions = torch.round(out.squeeze())

                val_pred.append(predictions)
                val_truth.append(label)

                correct += (predictions == label).sum().item()
                total += 1

            val_loss = sum(val_losses) / len(val_losses)
            val_accuracy = correct / total

        print(f'Epoch: {epoch+1}, Training Loss: {sum(train_losses)/len(train_losses)}, Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}, Validation F1 Score: {f1_score(val_truth, val_pred)}')

## Merge Data

In [14]:
with open("results/synthetic/06_04_10:47:54_merge_T_200_n_400_k1_4_k2_2_p_0.5_q_0.2_0/data.p", "rb") as f:
    merge_data = pkl.load(f)

cp_time = 80
flattened_train, flattened_test, flattened_val = create_synthetic_pairs(merge_data, cp_time)
model = SiameseGNN()
run_model(model, flattened_train, flattened_val)

 20%|██        | 1/5 [00:30<02:02, 30.56s/it]

Epoch: 1, Training Loss: 0.7030626838902633, Validation Loss: 0.6927091188430786, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506


 40%|████      | 2/5 [01:00<01:29, 29.98s/it]

Epoch: 2, Training Loss: 0.697512743473053, Validation Loss: 0.6922233791351319, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506


 60%|██████    | 3/5 [01:27<00:57, 29.00s/it]

Epoch: 3, Training Loss: 0.696107418636481, Validation Loss: 0.6923881580829621, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506


 80%|████████  | 4/5 [01:57<00:29, 29.19s/it]

Epoch: 4, Training Loss: 0.6961809026201566, Validation Loss: 0.6927824656963348, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506


100%|██████████| 5/5 [02:26<00:00, 29.34s/it]

Epoch: 5, Training Loss: 0.6956245772043864, Validation Loss: 0.6925743868350983, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506





## Clique Data

In [15]:
with open("results/synthetic/06_04_11:13:29_clique_cp_1_T_200_n_400_p_0.2_q_0.05_20_0/data.p", "rb") as f:
    clique_data = pkl.load(f)

cp_time = 133
flattened_train, flattened_test, flattened_val = create_synthetic_pairs(clique_data, cp_time)
model = SiameseGNN()
run_model(model, flattened_train, flattened_val)

 20%|██        | 1/5 [00:14<00:58, 14.65s/it]

Epoch: 1, Training Loss: 0.7027358784526586, Validation Loss: 0.6949385805130005, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506


 40%|████      | 2/5 [00:28<00:42, 14.29s/it]

Epoch: 2, Training Loss: 0.6930854049821694, Validation Loss: 0.6904357953071594, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506


 60%|██████    | 3/5 [00:42<00:28, 14.21s/it]

Epoch: 3, Training Loss: 0.6918158697585265, Validation Loss: 0.6914281842708587, Validation Accuracy: 0.524, Validation F1 Score: 0.6876640419947506


 80%|████████  | 4/5 [00:56<00:14, 14.20s/it]

Epoch: 4, Training Loss: 0.6835549014310042, Validation Loss: 0.6739447337388992, Validation Accuracy: 0.622, Validation F1 Score: 0.46458923512747874


100%|██████████| 5/5 [01:11<00:00, 14.32s/it]

Epoch: 5, Training Loss: 0.6490362334251404, Validation Loss: 0.6374697284698486, Validation Accuracy: 0.614, Validation F1 Score: 0.4469914040114613



