In [1]:
import gzip
import pickle
import networkx as nx
import pandas as pd
import numpy as np
import random
import os
from pebble import lattice

from torch.optim import SGD
from torch.optim.lr_scheduler import ReduceLROnPlateau

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
from torch_geometric.data import InMemoryDataset
from torch.utils.data import DataLoader
from torch_geometric.utils import from_networkx, to_networkx

In [3]:
def clustering_coefficient(G, node):
    ns = [n for n in G.neighbors(node)]
    if len(ns) <= 1:
        return 0
    
    numerator = 0
    denominator = len(ns) * (len(ns) - 1) / 2
    for i in range(0, len(ns)):
        for j in range(i+1, len(ns)):
            n1, n2 = ns[i], ns[j]
            numerator += G.has_edge(n1, n2)
    
    return numerator / denominator
                

In [4]:
class LamanDataset(InMemoryDataset):
    def __init__(self, root, data_dir, transform=None, pre_transform=None, pre_filter=None):
        self.data_dir = data_dir
        super().__init__(root, transform, pre_transform, pre_filter)
        self.data, self.slices = torch.load(self.processed_paths[0])
        
    @property
    def processed_file_names(self):
        return ['data.pt']
        
    def process(self):
        total_laman_data = None
        with gzip.open(self.data_dir, 'r') as f:
            total_laman_data = pickle.load(f)
            
        data_list = []
        for ind, graph in enumerate(total_laman_data[0]):
            x = generate_feature_vector(graph)
            graph_as_data = from_networkx(graph)
            graph_as_data.x = x
            graph_as_data.label = 0
            data_list.append(graph_as_data)
            
        for ind, graph in enumerate(total_laman_data[1]):
            x = generate_feature_vector(graph)
            graph_as_data = from_networkx(graph)
            graph_as_data.x = x
            graph_as_data.label = 1
            data_list.append(graph_as_data)
            
        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

In [5]:
DATA_PATH = "data/custom-generated.pkl.gz"
laman_data = LamanDataset("", DATA_PATH)

In [6]:
from torch.utils.data import random_split

proportions = [.7, .3]
lengths = [int(p * len(laman_data)) for p in proportions]
lengths[-1] = len(laman_data) - sum(lengths[:-1])

generator1 = torch.Generator().manual_seed(42)
train_data, test_data = random_split(laman_data, lengths, generator=generator1)

from torch_geometric.loader import DataLoader
train_loader = DataLoader(train_data, batch_size = 64, shuffle=True)
test_loader = DataLoader(test_data, batch_size = 64, shuffle=True)

In [13]:
from deepsets.deepset import DeepSets

In [14]:
model = DeepSets(num_features=4)
print(model)
print("Number of parameters: ", sum(p.numel() for p in model.parameters()))

DeepSets(
  (lin1): Linear(in_features=4, out_features=8, bias=True)
  (lin2): Linear(in_features=8, out_features=8, bias=True)
  (lin3): Linear(in_features=8, out_features=1, bias=True)
)
Number of parameters:  121
