In [8]:
%%capture
%env TORCH=2.0.1+cu118
!pip install torch-geometric
!python -c "import torch; print(torch.__version__)"

In [9]:
import os
from enum import Enum
from math import log
import time
import pathlib
import gc

import torch
import torch_geometric.utils as utils
from torch_geometric.data import Data

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [10]:
GRAPHS_PATH = "drive/MyDrive/GRL_Mini_Project/data"
MIN_P = 100000

class GenerationMode(Enum):
  ER = "erdos_renyi"
  SBM = "stochastic_block_model"
  BA = "barabasi_albert"

def graph_file_name(mode: GenerationMode, **kwargs):
  if mode == GenerationMode.ER:
    num_nodes = kwargs["num_nodes"]
    p_edge = kwargs["p_edge"]
    num_features = kwargs["num_features"]
    idx = kwargs.get("idx", "0")
    subtype = kwargs.get("subtype", "regular")

    if (p_edge < 1 / MIN_P):
      # In hindsight, p_edge does not need to be encoded at all.
      raise ValueError(f"Cannot assign file name, probability {p} too small.")

    return os.sep.join([
        GRAPHS_PATH, "erdos_renyi", subtype, "_".join([
            "graph", str(num_nodes), str(int(p_edge * MIN_P)), str(num_features), str(idx)
        ]) + ".pt"
    ])

  elif mode == GenerationMode.SBM:
    num_nodes = kwargs["num_nodes"]
    block_type = kwargs["block_type"]
    edge_type = kwargs["edge_type"]
    idx = kwargs["idx"]

    return os.sep.join([
        GRAPHS_PATH, "stochastic_block_model", block_type + "_" + edge_type, str(num_nodes), str(idx) + ".pt"
    ])

  elif mode == GenerationMode.BA:
    num_nodes = kwargs["num_nodes"]
    subtype = kwargs["subtype"]
    idx = kwargs["idx"]

    return os.sep.join([
        GRAPHS_PATH, "barabasi_albert", subtype, str(num_nodes), str(idx) + ".pt"
    ])

  else:
    raise ValueError("Cannot identify generation mode.")

print(graph_file_name(mode=GenerationMode.ER, num_nodes=10, p_edge=0.5, num_features=10))
print(graph_file_name(mode=GenerationMode.SBM, num_nodes=10, block_type="few", edge_type="dense_in", idx=1))
print(graph_file_name(mode=GenerationMode.BA, num_nodes=5, subtype="sparse", idx=2))

drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/regular/graph_10_50000_10_0.pt
drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/1.pt
drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/sparse/5/2.pt


In [11]:
def generate_erdos_renyi_graph(num_nodes, p_edge, num_features):
    edge_index = utils.erdos_renyi_graph(num_nodes, p_edge)
    x = torch.rand(num_nodes, num_features)
    return Data(x=x, edge_index=edge_index).to(device)

generate_erdos_renyi_graph(10, 0.5, 10).x.shape

torch.Size([10, 10])

In [12]:
nums_nodes = [10, 50, 100, 500, 1000, 5000]
nums_features = [10, 100, 1000, 10000]
subtypes = ["regular", "sparse", "very_sparse"]
p = 0.5
num_graphs = 32

def graph_property_iterator():
  for num_nodes in nums_nodes:
    for num_features in nums_features:
      for idx in range(num_graphs):
        p_sparse = log(num_nodes) / (10 * num_nodes)
        p_very_sparse = 1 / num_nodes
        for p_edge, subtype in zip([p, p_sparse, p_very_sparse], subtypes):
          yield num_nodes, num_features, idx, p_edge, subtype

def generate_all_erdos_renyi_graphs(print_existing=True):
  pathlib.Path(os.sep.join([GRAPHS_PATH, "erdos_renyi", "regular"])).mkdir(parents=True, exist_ok=True)
  pathlib.Path(os.sep.join([GRAPHS_PATH, "erdos_renyi", "sparse"])).mkdir(parents=True, exist_ok=True)
  pathlib.Path(os.sep.join([GRAPHS_PATH, "erdos_renyi", "very_sparse"])).mkdir(parents=True, exist_ok=True)

  i, last_time = 0, time.time()
  max_i = len(list(graph_property_iterator()))
  for num_nodes, num_features, graph_idx, p_edge, subtype in graph_property_iterator():
    i += 1

    file_name = graph_file_name(
        mode=GenerationMode.ER,
        num_nodes=num_nodes,
        p_edge=p_edge,
        num_features=num_features,
        subtype=subtype,
        idx=graph_idx,
    )

    if (os.path.isfile(file_name)):
      if (print_existing):
        print("Already exists, skipping", file_name)

    else:
      graph = generate_erdos_renyi_graph(num_nodes, p_edge, num_features)
      folder_path = os.sep.join(file_name.split(os.sep)[:-1])
      torch.save(graph, file_name)

      if (i % 50 == 0 or time.time() - last_time > 5):
        last_time = time.time()
        print(f"{i}/{max_i} Generated", file_name)

  gc.collect()
  print("Done!")

generate_all_erdos_renyi_graphs()

Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/regular/graph_10_50000_10_0.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/sparse/graph_10_2302_10_0.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/very_sparse/graph_10_10000_10_0.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/regular/graph_10_50000_10_1.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/sparse/graph_10_2302_10_1.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/very_sparse/graph_10_10000_10_1.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/regular/graph_10_50000_10_2.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/sparse/graph_10_2302_10_2.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/erdos_renyi/very_sparse/graph_10_10000_10_2.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/da

In [16]:
from itertools import product
from random import randint

nums_nodes = [10, 50, 100, 500, 1000, 5000]
num_features = 10
block_types = ["few", "many"]
edge_types = ["dense_in", "dense_out"]
num_graphs = 32

block_nums = {
    "few": lambda n: 5,
    "many": lambda n: int(n / 5),
}
edge_probs = {
    "dense_in": lambda c: torch.eye(c),
    "dense_out": lambda c: (torch.ones(c) - torch.eye(c)) / c
}

def partition(n, c):
  buckets = [1] * c
  for _ in range(n-c):
    buckets[randint(0, c-1)] += 1
  return buckets

def generate_all_stochastic_blockmodel_graphs():
  i, last_time = 0, time.time()
  key = list(product(nums_nodes, block_types, edge_types))
  max_i = len(key) * num_graphs
  for num_nodes, block_type, edge_type in key:
    block_num = block_nums[block_type](num_nodes)
    edge_prob = edge_probs[edge_type](block_num)
    for idx in range(num_graphs):
      file_name = graph_file_name(
          mode=GenerationMode.SBM,
          num_nodes=num_nodes,
          block_type=block_type,
          edge_type=edge_type,
          idx=idx
      )
      folder_name = os.sep.join(file_name.split(os.sep)[:-1])
      i += 1
      if (not os.path.isdir(folder_name)):
        pathlib.Path(folder_name).mkdir(parents=True, exist_ok=True)
        print("Created folder", folder_name)
      if (os.path.isfile(file_name)):
        print("Already exists, skipping", file_name)

      else:
        edge_index = utils.stochastic_blockmodel_graph(partition(num_nodes, block_num), edge_prob)
        x = torch.rand(num_nodes, num_features)
        graph = Data(x=x, edge_index=edge_index).to(device)
        torch.save(graph, file_name)

        if (i % 50 == 0 or time.time() - last_time > 5):
          last_time = time.time()
          print(f"{i}/{max_i} Generated", file_name)

  gc.collect()
  print("Done!")

generate_all_stochastic_blockmodel_graphs()

Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/0.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/1.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/2.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/3.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/4.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/5.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/6.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/7.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/stochastic_block_model/few_dense_in/10/8.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project

In [15]:
nums_nodes = [10, 50, 100, 500, 1000, 5000]
num_features = 10
subtypes = ["dense", "sparse", "very sparse"]
num_graphs = 32

edge_nums = {
    "dense": lambda n: int(n / 2),
    "sparse": lambda n: int(log(n)),
    "very sparse": lambda n: 1,
}

def generate_all_barabasi_albert_graphs():
  i, last_time = 0, time.time()
  key = list(product(nums_nodes, subtypes))
  max_i = len(key) * num_graphs
  for num_nodes, subtype in key:
    for idx in range(num_graphs):
      file_name = graph_file_name(
          mode=GenerationMode.BA,
          num_nodes=num_nodes,
          subtype=subtype,
          idx=idx
      )
      folder_name = os.sep.join(file_name.split(os.sep)[:-1])
      i += 1
      if (not os.path.isdir(folder_name)):
        pathlib.Path(folder_name).mkdir(parents=True, exist_ok=True)
        print("Created folder", folder_name)
      if (os.path.isfile(file_name)):
        print("Already exists, skipping", file_name)

      else:
        edge_index = utils.barabasi_albert_graph(num_nodes, edge_nums[subtype](num_nodes))
        x = torch.rand(num_nodes, num_features)
        graph = Data(x=x, edge_index=edge_index).to(device)
        torch.save(graph, file_name)

        if (i % 50 == 0 or time.time() - last_time > 5):
          last_time = time.time()
          print(f"{i}/{max_i} Generated", file_name)

  gc.collect()
  print("Done!")

generate_all_barabasi_albert_graphs()

Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/0.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/1.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/2.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/3.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/4.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/5.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/6.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/7.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/8.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/9.pt
Already exists, skipping drive/MyDrive/GRL_Mini_Project/data/barabasi_albert/dense/10/10.p