In [53]:
import random
import io
import networkx as nx
import scipy as sp
import numpy as np
rng = np.random.default_rng()
from collections import deque
from pathlib import Path

In [51]:
def monte_carlo_diffusion(G: nx.DiGraph, k: int):
  seed_nodes = [i.item() for i in rng.choice(list(G.nodes()), k, replace=False)]
  activated_nodes = set(seed_nodes)
  queue = deque(seed_nodes)
  diffusion_edges = []
  diffusion_timestamp_map = {}
  for node in seed_nodes:
    diffusion_timestamp_map[node] = 0
  while queue:
    node = queue.popleft()
    for neighbor in G.neighbors(node):
      if neighbor not in activated_nodes:
        if rng.random() < G[node][neighbor]['weight']:
          activated_nodes.add(neighbor)
          queue.append(neighbor)
          diffusion_edges.append((node, neighbor))
          diffusion_timestamp_map[neighbor] = diffusion_timestamp_map[node] + 1
  
  n = G.number_of_nodes()
  diffusion_timestamp_temp = [[] for _ in range(n)]
  diffusion_timestamp = []
  for k, v in diffusion_timestamp_map.items():
    diffusion_timestamp_temp[v].append(k)

  for t in range(len(diffusion_timestamp_temp)):
    if len(diffusion_timestamp_temp[t]) > 0:
      diffusion_timestamp.append(diffusion_timestamp_temp[t])

  return diffusion_edges, diffusion_timestamp

Erdos-Renyi

In [65]:
n = 10000
p = 0.2
G = nx.fast_gnp_random_graph(n, p, directed=True)
print(G.number_of_edges())

for (u,v,w) in G.edges(data=True):
    w['weight'] = random.uniform(0, 0.3)

fname = f"er_{n}_{str(p).replace('.', '')}"
fh = open(f"./datasets/synthetic/{fname}/{fname}.mtx", "wb")
m = nx.to_scipy_sparse_array(G)
sp.io.mmwrite(fh, m, precision=5)

19997118


In [66]:
n = 10000
p = 0.2

fname = f"er_{n}_{str(p).replace('.', '')}"
pdir = Path(f"./datasets/synthetic/{fname}")
pdir_graph = Path(f"./datasets/synthetic/{fname}/{fname}.mtx")
pdir_diffusion = Path(f"./datasets/synthetic/{fname}/diffusions")
pdir_timestamps = pdir_diffusion / "timestamps/"
pdir_edges = pdir_diffusion / "edges/"

pdir_timestamps.mkdir(parents=True, exist_ok=True)
pdir_edges.mkdir(parents=True, exist_ok=True)

with pdir_graph.open("rb") as fh:
  G = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)

for i in range(200):
  d_edges, d_timestamp = monte_carlo_diffusion(G, 1)
  p_ts = pdir_timestamps / f"{i}.txt"
  p_edges = pdir_edges / f"{i}.edgelist"
  with p_ts.open("w") as fh:
    for ts in d_timestamp:
      fh.write(" ".join(map(str, ts)) + "\n")
  with p_edges.open("w") as fh:
    fh.write(f"#Source Target\n")
    for e in d_edges:
      fh.write(" ".join(map(str, e)) + "\n")

Scale Free

In [72]:
n = 10000
a = 0.41
b = 0.54
c = 0.05
G = nx.scale_free_graph(n, a, b, c)
print(G.number_of_edges())

for (u,v,w) in G.edges(data=True):
    w['weight'] = random.uniform(0, 0.3)

fname = f"sf_{n}_{str(a).replace('.', '')}_{str(b).replace('.', '')}_{str(c).replace('.', '')}"
fh = open(f"./datasets/synthetic/{fname}/{fname}.mtx", "wb")
m = nx.to_scipy_sparse_array(G)
sp.io.mmwrite(fh, m, precision=5)

21449


In [74]:
n = 1000
a = 0.41
b = 0.54
c = 0.05

fname = f"sf_{n}_{str(a).replace('.', '')}_{str(b).replace('.', '')}_{str(c).replace('.', '')}"
pdir = Path(f"./datasets/synthetic/{fname}")
pdir_graph = Path(f"./datasets/synthetic/{fname}/{fname}.mtx")
pdir_diffusion = Path(f"./datasets/synthetic/{fname}/diffusions")
pdir_timestamps = pdir_diffusion / "timestamps/"
pdir_edges = pdir_diffusion / "edges/"

pdir_timestamps.mkdir(parents=True, exist_ok=True)
pdir_edges.mkdir(parents=True, exist_ok=True)

with pdir_graph.open("rb") as fh:
  G = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)

for i in range(200):
  d_edges, d_timestamp = monte_carlo_diffusion(G, 1)
  p_ts = pdir_timestamps / f"{i}.txt"
  p_edges = pdir_edges / f"{i}.edgelist"
  with p_ts.open("w") as fh:
    for ts in d_timestamp:
      fh.write(" ".join(map(str, ts)) + "\n")
  with p_edges.open("w") as fh:
    fh.write(f"#Source Target\n")
    for e in d_edges:
      fh.write(" ".join(map(str, e)) + "\n")

Real World Networks

In [75]:
fh = open("./graphs/higgs-social_network.edgelist", "rb")
G = nx.read_edgelist(fh, create_using=nx.DiGraph)
fh.close()

for (u,v,w) in G.edges(data=True):
    w['weight'] = random.uniform(0, 0.3)

fname = "higgs"
fh = open(f"./datasets/real/{fname}/{fname}.mtx", "wb")
m = nx.to_scipy_sparse_array(G)
sp.io.mmwrite(fh, m, precision=5)

In [76]:
fname = "higgs"
pdir = Path(f"./datasets/real/{fname}")
pdir_graph = Path(f"./datasets/real/{fname}/{fname}.mtx")
pdir_diffusion = Path(f"./datasets/real/{fname}/diffusions")
pdir_timestamps = pdir_diffusion / "timestamps/"
pdir_edges = pdir_diffusion / "edges/"

pdir_timestamps.mkdir(parents=True, exist_ok=True)
pdir_edges.mkdir(parents=True, exist_ok=True)

with pdir_graph.open("rb") as fh:
  G = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)

for i in range(200):
  d_edges, d_timestamp = monte_carlo_diffusion(G, 1)
  p_ts = pdir_timestamps / f"{i}.txt"
  p_edges = pdir_edges / f"{i}.edgelist"
  with p_ts.open("w") as fh:
    for ts in d_timestamp:
      fh.write(" ".join(map(str, ts)) + "\n")
  with p_edges.open("w") as fh:
    fh.write(f"#Source Target\n")
    for e in d_edges:
      fh.write(" ".join(map(str, e)) + "\n")

In [78]:
fh = open("./graphs/twitter_combined.txt", "rb")
G = nx.read_edgelist(fh, create_using=nx.DiGraph, nodetype=int)

for (u,v,w) in G.edges(data=True):
    w['weight'] = random.uniform(0, 0.3)

fname = "ego-twitter"
fh = open(f"./datasets/real/{fname}/{fname}.mtx", "wb")
m = nx.to_scipy_sparse_array(G)
sp.io.mmwrite(fh, m, precision=5)

In [79]:
fname = "ego-twitter"
pdir = Path(f"./datasets/real/{fname}")
pdir_graph = Path(f"./datasets/real/{fname}/{fname}.mtx")
pdir_diffusion = Path(f"./datasets/real/{fname}/diffusions")
pdir_timestamps = pdir_diffusion / "timestamps/"
pdir_edges = pdir_diffusion / "edges/"

pdir_timestamps.mkdir(parents=True, exist_ok=True)
pdir_edges.mkdir(parents=True, exist_ok=True)

with pdir_graph.open("rb") as fh:
  G = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)

for i in range(200):
  d_edges, d_timestamp = monte_carlo_diffusion(G, 1)
  p_ts = pdir_timestamps / f"{i}.txt"
  p_edges = pdir_edges / f"{i}.edgelist"
  with p_ts.open("w") as fh:
    for ts in d_timestamp:
      fh.write(" ".join(map(str, ts)) + "\n")
  with p_edges.open("w") as fh:
    fh.write(f"#Source Target\n")
    for e in d_edges:
      fh.write(" ".join(map(str, e)) + "\n")

In [80]:
fh = open("./graphs/facebook_combined.txt", "rb")
G = nx.read_edgelist(fh, create_using=nx.Graph, nodetype=int)

G = G.to_directed()
for (u,v,w) in G.edges(data=True):
    w['weight'] = random.uniform(0, 0.3)

fname = "ego-facebook"
fh = open(f"./datasets/real/{fname}/{fname}.mtx", "wb")
m = nx.to_scipy_sparse_array(G)
sp.io.mmwrite(fh, m, precision=5)

In [81]:
fname = "ego-facebook"
pdir = Path(f"./datasets/real/{fname}")
pdir_graph = Path(f"./datasets/real/{fname}/{fname}.mtx")
pdir_diffusion = Path(f"./datasets/real/{fname}/diffusions")
pdir_timestamps = pdir_diffusion / "timestamps/"
pdir_edges = pdir_diffusion / "edges/"

pdir_timestamps.mkdir(parents=True, exist_ok=True)
pdir_edges.mkdir(parents=True, exist_ok=True)

with pdir_graph.open("rb") as fh:
  G = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)

for i in range(200):
  d_edges, d_timestamp = monte_carlo_diffusion(G, 1)
  p_ts = pdir_timestamps / f"{i}.txt"
  p_edges = pdir_edges / f"{i}.edgelist"
  with p_ts.open("w") as fh:
    for ts in d_timestamp:
      fh.write(" ".join(map(str, ts)) + "\n")
  with p_edges.open("w") as fh:
    fh.write(f"#Source Target\n")
    for e in d_edges:
      fh.write(" ".join(map(str, e)) + "\n")