In [1]:
import gzip
import pickle
import networkx as nx
import pandas as pd
import numpy as np
import random
import os

## Data Loading

### Writing PyG Dataset

In [2]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from torch_geometric.data import InMemoryDataset
from torch.utils.data import DataLoader

In [4]:
from torch_geometric.utils import from_networkx, to_networkx

In [5]:
def generate_feature_vector(G):
    x = torch.randn(G.number_of_nodes(), 1)
    ind = 0
    for node in G.nodes():
        x[ind][0] = G.degree[node]
        ind += 1
    return x

In [6]:
class LamanDataset(InMemoryDataset):
    def __init__(self, root, data_dir, transform=None, pre_transform=None, pre_filter=None):
        self.data_dir = data_dir
        super().__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def processed_file_names(self):
        return ['data.pt']
        
    def process(self):
        # processing code here
        total_laman_data = None
        with gzip.open(self.data_dir, 'r') as f:
            total_laman_data = pickle.load(f)
            
        data_list = []
        ind = 0
        # convert from graph to Data object
        for graph in total_laman_data[0]:
#             print(ind)
            ind += 1
            num_nodes = nx.number_of_nodes(graph)
#             x = torch.randn(num_nodes, 1)
            x = generate_feature_vector(graph)
            graph_as_data = from_networkx(graph)
            graph_as_data.x = x
            graph_as_data.label = 0
            data_list.append(graph_as_data)
            
        ind = 0
        for graph in total_laman_data[1]:
#             print(ind)
            ind += 1
            num_nodes = nx.number_of_nodes(graph)
#             x = torch.randn(num_nodes, 64)
            x = generate_feature_vector(graph)
            graph_as_data = from_networkx(graph)
            graph_as_data.x = x
            graph_as_data.label = 1
            data_list.append(graph_as_data)
            
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [7]:
DATA_PATH = "../data-2d/data/4096-20-4-entries-med.pkl.gz"

In [8]:
laman_data = LamanDataset("", DATA_PATH)

## Split into Train / Test

In [9]:
from torch.utils.data import random_split

proportions = [.8, .2]
lengths = [int(p * len(laman_data)) for p in proportions]
lengths[-1] = len(laman_data) - sum(lengths[:-1])

generator1 = torch.Generator().manual_seed(42)
train_data, test_data = random_split(laman_data, lengths, generator=generator1)

In [10]:
from torch_geometric.loader import DataLoader
train_loader = DataLoader(train_data, batch_size = 256, shuffle=True)
test_loader = DataLoader(test_data, batch_size = 256, shuffle=True)

In [11]:
print("Number of train batches: ", len(train_loader))
print("Number of test batches: ", len(test_loader))

Number of train batches:  13
Number of test batches:  4


In [12]:
for data in train_loader:
    print(data)
    break

DataBatch(edge_index=[2, 19530], x=[5215, 1], label=[256], num_nodes=5215, batch=[5215], ptr=[257])


## Model Architecture & Training

In [13]:
from gin.gin import GIN

In [69]:
model = GIN(num_features=1)
print(model)
print("Number of parameters: ", sum(p.numel() for p in model.parameters()))

GIN(
  (conv1): GINConv(nn=Sequential(
    (0): Linear(in_features=1, out_features=16, bias=True)
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=16, out_features=16, bias=True)
    (4): ReLU()
  ))
  (conv2): GINConv(nn=Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=16, out_features=16, bias=True)
    (4): ReLU()
  ))
  (conv3): GINConv(nn=Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=16, out_features=16, bias=True)
    (4): ReLU()
  ))
  (conv4): GINConv(nn=Sequential(
    (0): Linear(in_features=16, out_features=16, bias=True)
    (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True,

In [70]:
from torch_geometric.data import DataLoader
from torch.nn import BCELoss
import warnings
warnings.filterwarnings("ignore")

loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),
                                      lr=0.01)

In [71]:
def train(data):
    ind = 0
    for batch in data:
        optimizer.zero_grad()
        pred, embedding = model(batch.x, batch.edge_index, batch.batch)
        pred = torch.squeeze(pred)
        loss = loss_fn(pred.float(), batch.label.float())
        loss.backward()
        optimizer.step()
        ind += 1

    return loss, None

In [72]:
def check_accuracy(model, loader):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for batch in loader:
            pred, embedding = model(batch.x, batch.edge_index, batch.batch)
            pred = torch.squeeze(pred)
            y = batch.label
            predictions = (pred > 0.5).long() 
            num_correct += (predictions == y).sum() 
            num_samples += predictions.size(0)
            
    return float(num_correct)/float(num_samples)*100

In [73]:
print("Starting training...")
losses = []

for epoch in range(500):
    loss, h = train(train_loader)
    losses.append(loss)
    print(f"Epoch {epoch} | Train loss {loss}")
    train_acc, test_acc = check_accuracy(model, train_loader), check_accuracy(model, test_loader)
    print(f"Train Accuracy {train_acc} | Test Accuracy {test_acc}")
    

Starting training...
Epoch 0 | Train loss 0.5209265947341919
Train Accuracy 65.35409035409036 | Test Accuracy 67.92682926829269
Epoch 1 | Train loss 0.5815510153770447
Train Accuracy 73.26007326007326 | Test Accuracy 76.82926829268293
Epoch 2 | Train loss 0.5442524552345276
Train Accuracy 70.26862026862027 | Test Accuracy 70.73170731707317
Epoch 3 | Train loss 0.4728721082210541
Train Accuracy 75.54945054945054 | Test Accuracy 77.07317073170732
Epoch 4 | Train loss 0.4501432776451111
Train Accuracy 76.12942612942612 | Test Accuracy 77.5609756097561
Epoch 5 | Train loss 0.44835999608039856
Train Accuracy 78.08302808302808 | Test Accuracy 79.39024390243902
Epoch 6 | Train loss 0.47201594710350037
Train Accuracy 78.75457875457876 | Test Accuracy 79.75609756097562
Epoch 7 | Train loss 0.3701326251029968
Train Accuracy 78.32722832722833 | Test Accuracy 79.8780487804878
Epoch 8 | Train loss 0.4398428201675415
Train Accuracy 78.41880341880342 | Test Accuracy 81.70731707317073
Epoch 9 | Train 

Train Accuracy 99.81684981684981 | Test Accuracy 99.8780487804878
Epoch 77 | Train loss 0.04423908144235611
Train Accuracy 99.9084249084249 | Test Accuracy 100.0
Epoch 78 | Train loss 0.04294076934456825
Train Accuracy 99.63369963369964 | Test Accuracy 99.7560975609756
Epoch 79 | Train loss 0.052222054451704025
Train Accuracy 100.0 | Test Accuracy 100.0
Epoch 80 | Train loss 0.048643626272678375
Train Accuracy 100.0 | Test Accuracy 100.0
Epoch 81 | Train loss 0.03910990059375763
Train Accuracy 100.0 | Test Accuracy 100.0
Epoch 82 | Train loss 0.03098950907588005
Train Accuracy 99.38949938949939 | Test Accuracy 99.39024390243902
Epoch 83 | Train loss 0.029945749789476395
Train Accuracy 100.0 | Test Accuracy 100.0
Epoch 84 | Train loss 0.03203766793012619
Train Accuracy 100.0 | Test Accuracy 100.0
Epoch 85 | Train loss 0.03246258944272995
Train Accuracy 100.0 | Test Accuracy 100.0
Epoch 86 | Train loss 0.030716385692358017
Train Accuracy 100.0 | Test Accuracy 100.0
Epoch 87 | Train loss 

Epoch 155 | Train loss 0.01872183009982109
Train Accuracy 99.93894993894995 | Test Accuracy 99.8780487804878
Epoch 156 | Train loss 0.028369497507810593
Train Accuracy 95.66544566544567 | Test Accuracy 96.34146341463415
Epoch 157 | Train loss 1.912736177444458
Train Accuracy 73.80952380952381 | Test Accuracy 78.53658536585367
Epoch 158 | Train loss 0.24430511891841888
Train Accuracy 74.60317460317461 | Test Accuracy 79.75609756097562
Epoch 159 | Train loss 0.12999315559864044
Train Accuracy 88.24786324786325 | Test Accuracy 90.97560975609757
Epoch 160 | Train loss 0.1041780561208725
Train Accuracy 98.07692307692307 | Test Accuracy 98.41463414634146
Epoch 161 | Train loss 0.09229019284248352
Train Accuracy 97.89377289377289 | Test Accuracy 98.53658536585365
Epoch 162 | Train loss 0.0621691457927227
Train Accuracy 98.96214896214897 | Test Accuracy 99.51219512195122
Epoch 163 | Train loss 0.053386591374874115
Train Accuracy 98.74847374847376 | Test Accuracy 99.02439024390245
Epoch 164 | T

KeyboardInterrupt: 

In [41]:
# sanity check

In [74]:
# square
import networkx as nx
square = nx.Graph()
square.add_edge(0, 1)
square.add_edge(1, 3)
square.add_edge(0, 2)
square.add_edge(2, 3)

In [75]:
# square with cross bar (rigid)
import networkx as nx
square_bar = nx.Graph()
square_bar.add_edge(0, 1)
square_bar.add_edge(1, 3)
square_bar.add_edge(0, 2)
square_bar.add_edge(2, 3)
square_bar.add_edge(0, 3)
square_bar.add_edge(2, 1)

In [76]:
# triangle
import networkx as nx
triangle = nx.Graph()
triangle.add_edge(0, 1)
triangle.add_edge(0, 2)
triangle.add_edge(1, 2)

In [77]:
# pentagon
import networkx as nx
pentagon = nx.Graph()
pentagon.add_edge(0, 1)
pentagon.add_edge(1, 3)
pentagon.add_edge(3, 4)
pentagon.add_edge(4, 2)
pentagon.add_edge(2, 0)

In [86]:
graph_as_data = from_networkx(pentagon)
graph_as_data.x = generate_feature_vector(pentagon)
graph_as_data.label = 0

In [87]:
graph_as_data

Data(edge_index=[2, 10], num_nodes=5, x=[5, 1], label=0)

In [88]:
validation_set = DataLoader([graph_as_data], batch_size = 1, shuffle=True)

In [89]:
for batch in validation_set:
    pred = model(batch.x, batch.edge_index, batch.batch)
    print(pred[0])
    print(pred[1])

tensor([[1.]], grad_fn=<SigmoidBackward0>)
tensor([[28.9286]], grad_fn=<AddmmBackward0>)


In [None]:
train_acc, test_acc = check_accuracy(model, train_loader), check_accuracy(model, test_loader)
print(f"Train Accuracy {train_acc} | Test Accuracy {test_acc}")