In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import random
import time
%matplotlib inline

In [29]:
# データの読み込み
# 枝確率を計算済みのネットワークを読み込む
network = pd.read_csv("data.csv")
network.head()

Unnamed: 0,# FromNodeId,ToNodeId,p
0,0,4,0.008
1,0,5,0.005682
2,0,7,0.033333
3,0,8,0.009615
4,0,9,0.066667


In [17]:
# numpy型に変換
network_np = network.values

In [18]:
# 空の有向グラフを作成
G = nx.DiGraph()

In [19]:
# 重み付きの枝を加える
G.add_weighted_edges_from(network_np)

In [20]:
# 入力は隣接リスト(numpy)[[from_node, to_node, edge_prob],...]
def live_edge_graph_edges(p, p_len):
    rand = np.random.uniform(0, 1, p_len)
    return np.array([[p[i][0], p[i][1]] for i in range(p_len) if rand[i] > p[i][2]])

In [21]:
%time simulation = live_edge_graph_edges(network_np, len(network_np))

CPU times: user 2.08 s, sys: 51.4 ms, total: 2.13 s
Wall time: 2.13 s


In [22]:
simulation

array([[0.0000e+00, 4.0000e+00],
       [0.0000e+00, 5.0000e+00],
       [0.0000e+00, 7.0000e+00],
       ...,
       [7.5885e+04, 7.9000e+03],
       [7.5885e+04, 1.6086e+04],
       [7.5886e+04, 5.1414e+04]])

In [23]:
def reach_node_size(live_edge, seed):
    # グラフ作成
    H = nx.DiGraph()
    H.add_edges_from(live_edge)
    
    # 到達可能な頂点集合をシードごとに和集合して求める
    reach_set = set([])
    for s in seed:
        # たどり着いているノードがシードでない場合だけ計算する
        if s not in reach_set:
            reach_set |= set(nx.dfs_preorder_nodes(H,source=s))
    return len(reach_set)

In [24]:
%time reach_node_size(simulation, [i for i in range(50)])

CPU times: user 2.95 s, sys: 137 ms, total: 3.09 s
Wall time: 3.11 s


23520

In [25]:
# 影響数の期待値(近似)
# approxim_expected_influence_num(枝確率, シード, ε, δ)
def approx_expect_inf_size(p, seed, epsi, delta):
    # グラフの作成
    G = nx.DiGraph()
    G.add_weighted_edges_from(p)
    
    # 頂点数
    n = G.number_of_nodes()
    
    # 試行回数を算出する
    T = int(((n**2) / (epsi**2)) * np.log(1/delta)) + 1
        
    # 各回のシュミレーションの結果の和が格納される
    X = 0
    len_p = len(p)
    # T回シュミレーションしていく
    for i in tqdm(range(T)):
        # live_edgeグラフを作る
        live_edge = live_edge_graph_edges(p, len_p)
        X += reach_node_size(live_edge, seed)
    expected_num = X / T
    return expected_num

In [26]:
# シュミレーションの結果的に変化していない場合は、回数を減らす関数
# 影響数の期待値(近似)
# approxim_expected_influence_num(枝確率, シード, ε, δ, 変化数)
def approx_expect_inf_size_heuris(p, seed, epsi, delta, change):
    # グラフの作成
    G = nx.DiGraph()
    G.add_weighted_edges_from(p)
    
    # 頂点数
    n = G.number_of_nodes()
    
    # 試行回数を算出する
    T = int(((n**2) / (epsi**2)) * np.log(1/delta)) + 1
        
    # 各回のシュミレーションの結果の和が格納される
    X = 0
    len_p = len(p)
    # T回シュミレーションしていく
    for i in tqdm(range(T)):
        # live_edgeグラフを作る
        live_edge = live_edge_graph_edges(p, len_p)
        X += reach_node_size(live_edge, seed)
        
        # 前回と比較して変化なし
        # if (i != 0) and (abs(((X / (i+1)) - expected_num) / expected_num) < rate_change):
        if (i != 0) and (abs(((X / (i+1)) - expected_num) < change)):    
            return expected_num
        
        expected_num = X / (i+1)
    return expected_num

In [27]:
epsi = 0.999999999
delta = 0.99999999
n = 70000
int(((n**2) / (epsi**2)) * np.log(1/delta)) + 1

50

In [28]:
%time approx_expect_inf_size_heuris(network_np, [0,3], epsi, delta, 1)

HBox(children=(IntProgress(value=0, max=58), HTML(value='')))

CPU times: user 24.3 s, sys: 1.08 s, total: 25.4 s
Wall time: 26.1 s


23428.333333333332