In [10]:
import time
import networkx as nx
import numpy as np
from pathlib import Path
import scipy as sp
rng = np.random.default_rng()
from collections import deque

In [14]:
gname = "ego-facebook"

gnn_path = Path(f"results/gnn/{gname}_50.mtx")
slicer_path = Path(f"results/slicer/{gname}_50.mtx")
actual_path = Path(f"datasets/real/{gname}/graph.mtx")

imm_gnn = Path(f"IMM/{gname}-gnn/")
imm_slicer= Path(f"IMM/{gname}-slicer/")
imm_actual = Path(f"IMM/{gname}-actual/")

with open(gnn_path, "rb") as fh:
  G_gnn = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)
with open(slicer_path, "rb") as fh:
  G_slicer = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)
with open(actual_path, "rb") as fh:
  G_actual = nx.from_scipy_sparse_array(sp.io.mmread(fh), create_using=nx.DiGraph)

In [15]:
with open(imm_gnn / "attribute.txt", "w") as fh:
    fh.write(f"n={G_gnn.number_of_nodes()}\n")
    fh.write(f"m={G_gnn.number_of_edges()}\n")
with open(imm_gnn / "graph.txt", "wb") as fh:
    nx.write_edgelist(G_gnn, fh, data=False)
with open(imm_gnn / "graph_ic.inf", "wb") as fh:
    nx.write_edgelist(G_gnn, fh, data=['weight'])

with open(imm_slicer / "attribute.txt", "w") as fh:
    fh.write(f"n={G_slicer.number_of_nodes()}\n")
    fh.write(f"m={G_slicer.number_of_edges()}\n")
with open(imm_slicer / "graph.txt", "wb") as fh:
    nx.write_edgelist(G_slicer, fh, data=False)
with open(imm_slicer / "graph_ic.inf", "wb") as fh:
    nx.write_edgelist(G_slicer, fh, data=['weight'])

with open(imm_actual / "attribute.txt", "w") as fh:
    fh.write(f"n={G_actual.number_of_nodes()}\n")
    fh.write(f"m={G_actual.number_of_edges()}\n")
with open(imm_actual / "graph.txt", "wb") as fh:
    nx.write_edgelist(G_actual, fh, data=False)
with open(imm_actual / "graph_ic.inf", "wb") as fh:
    nx.write_edgelist(G_actual, fh, data=['weight'])

In [24]:
def monte_carlo_trial(G: nx.DiGraph, seed_nodes: list[int]):
    activated_nodes = set(seed_nodes)
    queue = deque(seed_nodes)
    while queue:
        node = queue.popleft()
        for neighbor in G.neighbors(node):
            if neighbor not in activated_nodes and rng.random() <= G[node][neighbor]['weight']:
                activated_nodes.add(neighbor)
                queue.append(neighbor)
    return len(activated_nodes)

def monto_carlo_spread(G: nx.DiGraph, seed_nodes: list[int], T: int = 5000):
    total_spread = 0
    for _ in range(T):
        total_spread += monte_carlo_trial(G, seed_nodes)
    return total_spread / T


In [25]:
gnn_seeds = [
    [0],
    [136, 3974,],
    [107, 3974, 3410, 3266,],
    [351, 3833, 3434, 3266, 342, 3917, 3815, 3786,],
    [107, 3833, 3522, 3266, 186, 3836, 1374, 3779, 4011, 3790, 3265, 2335, 1513, 3329, 3267, 1198,],
    [107, 3841, 3766, 3266, 186, 3858, 3818, 3267, 1513, 488, 3785, 2037, 1562, 3812, 3860, 640, 4038, 3921, 1793, 3790, 1374, 4029, 3786, 756, 3979, 3791, 4035, 3692, 2202, 112, 3987, 3978,],
]

slicer_seeds = [
    [134],
    [0, 1884,],
    [223, 1993, 1827, 2282,],
    [72, 1854, 1827, 2282, 3882, 4021, 1759, 2425,],
    [104, 1983, 1827, 2282, 1792, 4000, 1572, 3655, 1072, 89, 259, 2031, 1857, 597, 2864, 1820,],
    [297, 1851, 1827, 2282, 1742, 4031, 1836, 3682, 4038, 597, 1512, 2596, 91, 2018, 310, 1830, 2528, 2012, 624, 1943, 1572, 102, 1947, 2028, 2344, 1339, 166, 2004, 83, 2030, 190, 591,],
]

actual_seeds = [
    [107],
    [107, 1858,],
    [107, 1864, 3517, 186,],
    [351, 1973, 3492, 25, 1827, 2642, 3652, 1513,],
    [136, 1831, 3851, 277, 1827, 312, 3879, 4031, 2425, 1794, 2016, 3179, 1381, 2442, 2834, 2307,],
    [136, 1831, 3626, 109, 1827, 3761, 162, 2489, 3996, 1513, 2642, 175, 4, 3687, 2016, 4027, 2831, 3717, 1913, 3122, 2010, 3303, 337, 1793, 2033, 2657, 1339, 3905, 3722, 2596, 115, 3882,],
]

In [None]:
spreads = []
for i in range(len(actual_seeds)):
    gnn_spread = monto_carlo_spread(G_actual, gnn_seeds[i])
    slicer_spread = monto_carlo_spread(G_actual, slicer_seeds[i])
    actual_spread = monto_carlo_spread(G_actual, actual_seeds[i])
    spreads.append([gnn_spread, slicer_spread, actual_spread])

In [22]:
from tabulate import tabulate

headers = ['k', 'GNN', 'Slicer', 'Ground Truth']
table = [headers]
for i in range(len(spreads)):
    k = 2**i
    table.append([f"{k}", *spreads[i]])
print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

╒═════╤═════════╤══════════╤════════════════╕
│   k │     GNN │   Slicer │   Ground Truth │
╞═════╪═════════╪══════════╪════════════════╡
│   1 │ 2744.39 │  2450.47 │        2928.41 │
├─────┼─────────┼──────────┼────────────────┤
│   2 │ 2941.84 │  2920.99 │        3030.8  │
├─────┼─────────┼──────────┼────────────────┤
│   4 │ 2941.41 │  2942.73 │        3071.72 │
├─────┼─────────┼──────────┼────────────────┤
│   8 │ 2983.83 │  3053.37 │        3091.14 │
├─────┼─────────┼──────────┼────────────────┤
│  16 │ 2993.74 │  3093.97 │        3107    │
├─────┼─────────┼──────────┼────────────────┤
│  32 │ 3024.28 │  3113.64 │        3118.72 │
╘═════╧═════════╧══════════╧════════════════╛
