In [35]:
import gzip
import pickle
import networkx as nx
import pandas as pd
import numpy as np
import random
import os

## Data Loading

### Writing PyG Dataset

In [36]:
import torch

In [37]:
from torch_geometric.data import InMemoryDataset
from torch.utils.data import DataLoader

In [38]:
from torch_geometric.utils import from_networkx, to_networkx

In [39]:
def generate_feature_vector(G):
    x = torch.randn(G.number_of_nodes(), 1)
    ind = 0
    for node in G.nodes():
        x[ind] = G.degree[node]
        ind += 1
    return x

In [42]:
class LamanDataset(InMemoryDataset):
    def __init__(self, root, data_dir, transform=None, pre_transform=None, pre_filter=None):
        self.data_dir = data_dir
        super().__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def processed_file_names(self):
        return ['data.pt']
        
    def process(self):
        # processing code here
        total_laman_data = None
        with gzip.open(self.data_dir, 'r') as f:
            total_laman_data = pickle.load(f)
            
        data_list = []
        ind = 0
        # convert from graph to Data object
        for graph in total_laman_data[0]:
#             print(ind)
            ind += 1
            num_nodes = nx.number_of_nodes(graph)
#             x = torch.randn(num_nodes, 1)
            x = generate_feature_vector(graph)
            graph_as_data = from_networkx(graph)
            graph_as_data.x = x
            graph_as_data.label = 0
            data_list.append(graph_as_data)
            
        ind = 0
        for graph in total_laman_data[1]:
#             print(ind)
            ind += 1
            num_nodes = nx.number_of_nodes(graph)
#             x = torch.randn(num_nodes, 64)
            x = generate_feature_vector(graph)
            graph_as_data = from_networkx(graph)
            graph_as_data.x = x
            graph_as_data.label = 1
            data_list.append(graph_as_data)
            
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [43]:
DATA_PATH = "../data-2d/data/low_decomp_sahil_dataset.pkl.gz"

In [44]:
laman_data = LamanDataset("", DATA_PATH)

Processing...
Done!


## Split into Train / Test

In [45]:
from torch.utils.data import random_split

proportions = [.8, .2]
lengths = [int(p * len(laman_data)) for p in proportions]
lengths[-1] = len(laman_data) - sum(lengths[:-1])

generator1 = torch.Generator().manual_seed(42)
train_data, test_data = random_split(laman_data, lengths, generator=generator1)

In [46]:
from torch_geometric.loader import DataLoader
train_loader = DataLoader(train_data, batch_size = 512, shuffle=True)
test_loader = DataLoader(test_data, batch_size = 512, shuffle=True)

In [47]:
print("Number of train batches: ", len(train_loader))
print("Number of train batches: ", len(test_loader))

Number of train batches:  32
Number of train batches:  8


In [48]:
for data in train_loader:
    print(data)
    break

DataBatch(edge_index=[2, 76834], x=[21017, 1], label=[512], num_nodes=21017, batch=[21017], ptr=[513])


## Model Architecture & Training

In [49]:
from basic_gcn.gcn import GCN

In [67]:
model = GCN(1)
print(model)
print("Number of parameters: ", sum(p.numel() for p in model.parameters()))

GCN(
  (initial_conv): GCNConv(1, 64)
  (conv1): GCNConv(64, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (out): Linear(in_features=128, out_features=1, bias=True)
)
Number of parameters:  12737


In [68]:
from torch_geometric.data import DataLoader
from torch.nn import BCELoss
import warnings
warnings.filterwarnings("ignore")

loss_fn = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [69]:
def train(data):
    ind = 0
    for batch in data:
        optimizer.zero_grad()
        pred, embedding = model(batch.x, batch.edge_index, batch.batch)
        pred = torch.squeeze(pred)
        loss = loss_fn(pred.float(), batch.label.float())
        loss.backward()
        optimizer.step()
        ind += 1

    return loss, None

In [53]:
def check_accuracy(model, loader):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for batch in loader:
            pred, embedding = model(batch.x, batch.edge_index, batch.batch)
            pred = torch.squeeze(pred)
            y = batch.label
            predictions = (pred > 0.5).long() 
            num_correct += (predictions == y).sum() 
            num_samples += predictions.size(0)
            
    return float(num_correct)/float(num_samples)*100

In [54]:
print("Starting training...")
losses = []

for epoch in range(10):
    loss, h = train(train_loader)
    losses.append(loss)
    print(f"Epoch {epoch} | Train loss {loss}")
    train_acc, test_acc = check_accuracy(model, train_loader), check_accuracy(model, test_loader)
    print(f"Train Accuracy {train_acc} | Test Accuracy {test_acc}")
    

Starting training...
Epoch 0 | Train loss 0.4671815037727356
Train Accuracy 49.6375 | Test Accuracy 49.75
Epoch 1 | Train loss 0.2666562795639038
Train Accuracy 87.5625 | Test Accuracy 87.8
Epoch 2 | Train loss 0.2470957338809967
Train Accuracy 90.20625 | Test Accuracy 90.325
Epoch 3 | Train loss 0.222549170255661
Train Accuracy 89.49375 | Test Accuracy 89.97500000000001
Epoch 4 | Train loss 0.1858639419078827
Train Accuracy 90.69375 | Test Accuracy 90.9
Epoch 5 | Train loss 0.2059520184993744
Train Accuracy 91.19375 | Test Accuracy 91.27499999999999
Epoch 6 | Train loss 0.17264482378959656
Train Accuracy 91.23125 | Test Accuracy 91.625
Epoch 7 | Train loss 0.1342594027519226
Train Accuracy 92.0625 | Test Accuracy 92.2
Epoch 8 | Train loss 0.3715443015098572
Train Accuracy 84.20625000000001 | Test Accuracy 83.75
Epoch 9 | Train loss 0.23607076704502106
Train Accuracy 91.77499999999999 | Test Accuracy 91.875


In [55]:
# sanity check

In [119]:
# square
import networkx as nx
square = nx.Graph()
square.add_edge(0, 1)
square.add_edge(1, 3)
square.add_edge(0, 2)
square.add_edge(2, 3)

In [120]:
# triangle
import networkx as nx
triangle = nx.Graph()
triangle.add_edge(0, 1)
triangle.add_edge(0, 2)
triangle.add_edge(1, 2)

In [121]:
# pentagon
import networkx as nx
pentagon = nx.Graph()
pentagon.add_edge(0, 1)
pentagon.add_edge(1, 3)
pentagon.add_edge(3, 4)
pentagon.add_edge(4, 2)
pentagon.add_edge(2, 0)

In [139]:
graph_as_data = from_networkx(square)
graph_as_data.x = generate_feature_vector(square)
graph_as_data.label = 1

In [140]:
graph_as_data

Data(edge_index=[2, 8], num_nodes=4, x=[4, 1], label=1)

In [141]:
print(x)

tensor([[2.],
        [2.],
        [2.],
        [2.]])


In [142]:
validation_set = DataLoader([graph_as_data], batch_size = 1, shuffle=True)

In [143]:
for batch in validation_set:
    pred = model(batch.x, batch.edge_index, batch.batch)
    print(pred[0])
    print(pred[1])

tensor([[0.3664]], grad_fn=<SigmoidBackward0>)
tensor([[-0.0810, -0.0530,  0.0640,  0.0714,  0.3386,  0.0830,  0.2225,  0.0301,
          0.2901,  0.1743, -0.1755,  0.5706, -0.3748, -0.0577, -0.1254, -0.1634,
         -0.1954,  0.3294,  0.6957, -0.3543,  0.0617, -0.5722,  0.1475, -0.6627,
         -0.0863,  0.4758,  0.2110, -0.5136, -0.1718, -0.1396, -0.6790,  0.0805,
         -0.1189,  0.3113,  0.2414,  0.4347, -0.1025, -0.3450, -0.1707,  0.3018,
         -0.0838,  0.0625, -0.5034, -0.2126, -0.0278,  0.3532,  0.1729,  0.1823,
         -0.1498,  0.6166, -0.3159,  0.2840,  0.3516, -0.2224, -0.4679, -0.8201,
         -0.2863,  0.0591, -0.1729,  0.1267, -0.3737, -0.1431,  0.2923,  0.2112,
         -0.0810, -0.0530,  0.0640,  0.0714,  0.3386,  0.0830,  0.2225,  0.0301,
          0.2901,  0.1743, -0.1755,  0.5706, -0.3748, -0.0577, -0.1254, -0.1634,
         -0.1954,  0.3294,  0.6957, -0.3543,  0.0617, -0.5722,  0.1475, -0.6627,
         -0.0863,  0.4758,  0.2110, -0.5136, -0.1718, -0.1396,