# Pruned Monte-Carlo

* 強連結成分分解の関数を作る
* GAINの再帰部分をどうするか？
* Cでシュミレーションだけやってしまう

In [3]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm
import random
import time
from collections import deque

%matplotlib inline

In [4]:
# データの読み込み
# 枝確率を計算済みのネットワークを読み込む
network = pd.read_csv("data.csv")
network.head()

Unnamed: 0,# FromNodeId,ToNodeId,p
0,0,4,0.008
1,0,5,0.005682
2,0,7,0.033333
3,0,8,0.009615
4,0,9,0.066667


In [6]:
# numpy型に変換
network_np = network.values

In [11]:
# 入力は隣接リスト(numpy)[[from_node, to_node, edge_prob],...]
def live_edge_graph_edges(p, p_len):
    rand = np.random.uniform(0, 1, p_len)
    return np.array([[p[i][0], p[i][1]] for i in range(p_len) if rand[i] < p[i][2]])

In [17]:
# 幅優先探索によって、Sから到達可能なノードを返す
def bfs(G, S):
    visited = {s:None for s in S}
    queue = deque(S)
    while queue:
        v = queue.popleft()
        out_node = G[v]
        for u in out_node:
            if not (u in visited):
                queue.append(u)
                visited[u] = v
    return visited

In [45]:
# 幅優先探索によって、Sから到達可能なノードを返す
def bfs_reverse(G, S):
    visited = {s:None for s in S}
    queue = deque(S)
    while queue:
        v = queue.popleft()
        in_node = G.predecessors(v)
        for u in in_node:
            if not (u in visited):
                queue.append(u)
                visited[u] = v
    return visited

In [14]:
%time simulation = live_edge_graph_edges(network_np, len(network_np))

CPU times: user 697 ms, sys: 21.3 ms, total: 718 ms
Wall time: 761 ms


In [15]:
len(simulation)

51954

In [None]:
# i回目のシュミレーションでv_Vが到達できる頂点数
def GAIN(i, v, V, latest, A, h, D, E):
    # v:i回目のシュミレーションで作成されたグラフのv_Vを含む強連結成分
    
    # v_Vが必要ないため、
    #v = comp[i][v_V]["members"]
    
    # V[i]にvがない場合0(後に消されていくため)
    if v not in V[i]:
        return 0
    
    # ? delta
    if latest[i][v]:
        delta[i][v]
        
    latest[i][v] = True
    
    # len(S)==0の理由は初回のみ行えば良いため(h(ハブ)以降の到達頂点数は一回行えば十分であるため)
    # vがhのacestorだった場合、hの到達頂点数を計算して、他のacestorの時にも使い回す
    if (v in A[i]) and (len(S) == 0):
        # GAINの引数は後で変える
        # ?下手に再帰にしなくても、hのGAINが分かればいい
        # hのGAINをはじめから足しておく
        delta[i][v] = GAIN(i, h_V)
    else:
        delta[i][v] = 0
    
    # bfs
    visited = {s:None for s in S}
    Q = deque(v)
    # Xは探索済みの強連結成分
    X = set([v])
    while Q:
        u = Q.popleft()
        
        # 意味不明
        if (v in A[i]) and (u in D[i]) and (len(S) == 0):
            continue
        
        delta[i][v] += weight[i][u]
        
        
        for u_, w in E[i]:
            # uから出ている任意の枝について探索する
            if u_ == u:
                # 探索済みの強連結成分は探索しなくていいので、w not in X
                # w in V[i]はのちのupdateでV[i]が変化するため
                if (w not in X) and (w in V[i]):
                    Q.append(w)
                    X.add(w)
    return delta[i][v]

In [80]:
s = set([3])
s.add(4)
s

{3, 4}

In [2]:
def PMC_greedy(network_np, k, R):
    E_ = dict([])
    G_ = dict([])
    G = dict([])
    V = dict([])
    E = dict([])
    comp = dict([])
    weight = dict([])
    h = dict([])
    D = dict([])
    A = dict([])
    latest =dict([])
    
    for i in range(R):
        # 各辺 e を確率 pe で残すことで得られる辺集合
        E_[i] = live_edge_graph_edges(network_np, len(network_np))
        
        G_[i] = nx.DiGraph()
        G_[i].add_weighted_edges_from(E_[i])
        # G_i_ = (V, E_i_) の強連結成分を計算
        # G_i_より構築された頂点重み付き DAG
        G[i] = nx.condensation(G_[i])
        comp[i] = dict(G[i].node)
        V[i] = G[i].node()
        E[i] = G[i].edges()
        
        # 縮約後の各強連結成分のノード数
        G_i_item = dict(G[i].nodes).items()
        weight[i] = {node:len(list(w.values())[0]) for node, w in G_i_item}
        
        # h_i ← V_i において最大の次数をもつ頂点
        G_i_deg = dict(G[i].degree())
        h[i] = max(G_i_deg, key=G_i_deg.get)
        
        # h_iから到達可能な頂点集合
        # ? 型は未定
        # D:Descendant
        D[i] = set(bfs(G[i], [h[i]]))
        
        # h_iに到達可能な頂点集合
        # ? 型は未定
        # A:Ancestor
        A[i] = set(bfs_reverse(G[i], [h[i]]))
        
        latest[i] = [False for v in V[i]]
        
    
    S = []
    for i in range(k):
        t = argmax()
        S.append(t)
    return S

In [28]:
set({3:1, 4:2, 5:6})

{3, 4, 5}

In [49]:
%%time

network_np = network.values
k = 3
R = 1


E_ = dict([])
G_ = dict([])
G = dict([])
V = dict([])
E = dict([])
comp = dict([])
weight = dict([])
h = dict([])
D = dict([])
A = dict([])
latest =dict([])

for i in range(R):
    # 各辺 e を確率 pe で残すことで得られる辺集合
    start = time.time()
    E_[i] = live_edge_graph_edges(network_np, len(network_np))
    print("1",time.time() - start)

    start = time.time()
    G_[i] = nx.DiGraph()
    G_[i].add_edges_from(E_[i])
    print("2",time.time() - start)
    
    # G_i_ = (V, E_i_) の強連結成分を計算
    # G_i_より構築された頂点重み付き DAG
    start = time.time()
    G[i] = nx.condensation(G_[i])
    print("3",time.time() - start)
    
    start = time.time()
    comp[i] = dict(G[i].node)
    V[i] = G[i].node()
    E[i] = G[i].edges()
    print("4",time.time() - start)

    # 縮約後の各強連結成分のノード数
    start = time.time()
    G_i_item = dict(G[i].nodes).items()
    weight[i] = {node:len(list(w.values())[0]) for node, w in G_i_item}
    print("5",time.time() - start)

    # h_i ← V_i において最大の次数をもつ頂点
    start = time.time()
    G_i_deg = dict(G[i].degree())
    h[i] = max(G_i_deg, key=G_i_deg.get)
    print("6",time.time() - start)

    # h_iから到達可能な頂点集合
    # ? 型は未定
    # D:Descendant
    start = time.time()
    D[i] = set(bfs(G[i], [h[i]]))
    print("7",time.time() - start)

    # h_iに到達可能な頂点集合
    # ? 型は未定
    # A:Ancestor
    start = time.time()
    A[i] = set(bfs_reverse(G[i], [h[i]]))
    print("8",time.time() - start)

    start = time.time()
    latest[i] = [False for v in V[i]]
    print("9",time.time() - start)

1 0.5074639320373535
2 0.24335122108459473
3 0.9596779346466064
4 0.02229785919189453
5 0.07672572135925293
6 0.037210941314697266
7 0.003609180450439453
8 1.5020370483398438e-05
9 0.003695964813232422
CPU times: user 2.06 s, sys: 51.4 ms, total: 2.11 s
Wall time: 1.94 s


In [58]:
for i in range(10000):
    if len(comp[0][i]["members"]) >= 2:
        print(i)
        break

81


In [59]:
comp[0][81]["members"]

{2379.0, 26963.0}

In [66]:
weight[0][81]

2

In [None]:
# シュミレーションはC++かCで計算させて読み込んだ方がいいのでは?
# 強連結成分分解は自作...

In [72]:
for u_,w in E[0]:
    if u_ == 4:
        print(u_ ,w)

4 0
4 1
4 2
4 3
