In [3]:
import itertools
import random
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from scipy.sparse.csgraph import shortest_path
import math
from tqdm import tqdm_notebook as tqdm
import pandas as pd

%matplotlib inline

# Epinions

https://snap.stanford.edu/data/soc-Epinions1.html

## データ準備

In [4]:
network = pd.read_table("data/Epinions/soc-Epinions1.txt",header=3)
network.head()

Unnamed: 0,# FromNodeId,ToNodeId
0,0,4
1,0,5
2,0,7
3,0,8
4,0,9


## WeightedCascade

### 各ノードの入次数を計算する

In [10]:
network["ToNodeId"].unique()

array([    4,     5,     7, ..., 75880, 75881, 52098])

In [11]:
node_id = network["ToNodeId"].unique()

In [12]:
in_degree = network.groupby("ToNodeId").size()
in_degree = in_degree.reset_index()
in_degree.columns = ["ToNodeId", "in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree
0,0,636
1,1,802
2,2,237
3,3,40
4,4,125


### 枝確率の計算

枝$e$の終点を頂点$v$  
$d(v)$=頂点$v$の入次数としたとき、
$$ p(e)=\frac{1}{d(v)} $$

In [13]:
in_degree["WC"] = 1/in_degree["in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree,WC
0,0,636,0.001572
1,1,802,0.001247
2,2,237,0.004219
3,3,40,0.025
4,4,125,0.008


In [14]:
network_p = pd.merge(network, in_degree, on="ToNodeId", how="left").drop(columns="in_degree")
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,WC
0,0,4,0.008
1,0,5,0.005682
2,0,7,0.033333
3,0,8,0.009615
4,0,9,0.066667


### データの出力 

In [16]:
network_p.to_csv("data/Epinions/WC.csv",index=False)

## TRIVARENCY

### {0.1,0.01,0.001}をランダムに割り当てる

In [17]:
P = [0.1, 0.01, 0.001]
TR = np.random.choice(P, len(network))
network_p["TR"] = TR

network_p = network_p[["# FromNodeId", "ToNodeId", "TR"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,TR
0,0,4,0.1
1,0,5,0.01
2,0,7,0.1
3,0,8,0.01
4,0,9,0.1


In [18]:
network_p.to_csv("data/Epinions/TR.csv",index=False)

## P=0.1

In [19]:
P = 0.1
network_p["P=0.1"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.1"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.1
0,0,4,0.1
1,0,5,0.1
2,0,7,0.1
3,0,8,0.1
4,0,9,0.1


In [20]:
network_p.to_csv("data/Epinions/P=0_1.csv",index=False)

## P=0.01

In [21]:
P = 0.01
network_p["P=0.01"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.01"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.01
0,0,4,0.01
1,0,5,0.01
2,0,7,0.01
3,0,8,0.01
4,0,9,0.01


In [22]:
network_p.to_csv("data/Epinions/P=0_01.csv",index=False)

## P=0.001

In [23]:
P = 0.001
network_p["P=0.001"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.001"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.001
0,0,4,0.001
1,0,5,0.001
2,0,7,0.001
3,0,8,0.001
4,0,9,0.001


In [24]:
network_p.to_csv("data/Epinions/P=0_001.csv",index=False)

# データ解析用(縮小ver)

In [28]:
network = network[(network["# FromNodeId"] < 30) & (network["ToNodeId"] < 30)]

## WeightedCascade

### 各ノードの入次数を計算する

In [29]:
network["ToNodeId"].unique()

array([ 4,  5,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
       22, 23, 24, 25, 26, 27, 28, 29,  1,  0,  2,  6,  3])

In [30]:
node_id = network["ToNodeId"].unique()

In [31]:
in_degree = network.groupby("ToNodeId").size()
in_degree = in_degree.reset_index()
in_degree.columns = ["ToNodeId", "in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree
0,0,14
1,1,12
2,2,7
3,3,2
4,4,9


### 枝確率の計算

枝$e$の終点を頂点$v$  
$d(v)$=頂点$v$の入次数としたとき、
$$ p(e)=\frac{1}{d(v)} $$

In [32]:
in_degree["WC"] = 1/in_degree["in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree,WC
0,0,14,0.071429
1,1,12,0.083333
2,2,7,0.142857
3,3,2,0.5
4,4,9,0.111111


In [33]:
network_p = pd.merge(network, in_degree, on="ToNodeId", how="left").drop(columns="in_degree")
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,WC
0,0,4,0.111111
1,0,5,0.090909
2,0,7,0.333333
3,0,8,0.25
4,0,9,0.333333


### データの出力 

In [34]:
network_p.to_csv("data/Epinions/WC_small.csv",index=False)

# Pokec

https://snap.stanford.edu/data/soc-Pokec.html

## データ準備

In [26]:
network = pd.read_table("data/Pokec/soc-pokec-relationships.txt",header=None)
network.head()

Unnamed: 0,0,1
0,1,13
1,1,11
2,1,6
3,1,3
4,1,4


In [27]:
network.columns=["# FromNodeId","ToNodeId"]
network.head()

Unnamed: 0,# FromNodeId,ToNodeId
0,1,13
1,1,11
2,1,6
3,1,3
4,1,4


## WeightedCascade

### 各ノードの入次数を計算する

In [28]:
network["ToNodeId"].unique()

array([     13,      11,       6, ..., 1632771, 1623923, 1632736])

In [29]:
node_id = network["ToNodeId"].unique()

In [30]:
in_degree = network.groupby("ToNodeId").size()
in_degree = in_degree.reset_index()
in_degree.columns = ["ToNodeId", "in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree
0,1,14
1,2,42
2,3,3
3,4,1
4,5,14


### 枝確率の計算

枝$e$の終点を頂点$v$  
$d(v)$=頂点$v$の入次数としたとき、
$$ p(e)=\frac{1}{d(v)} $$

In [31]:
in_degree["WC"] = 1/in_degree["in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree,WC
0,1,14,0.071429
1,2,42,0.02381
2,3,3,0.333333
3,4,1,1.0
4,5,14,0.071429


In [32]:
network_p = pd.merge(network, in_degree, on="ToNodeId", how="left").drop(columns="in_degree")
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,WC
0,1,13,0.037037
1,1,11,0.027027
2,1,6,0.071429
3,1,3,0.333333
4,1,4,1.0


### データの出力 

In [33]:
network_p.to_csv("data/Pokec/WC.csv",index=False)

## TRIVARENCY

### {0.1,0.01,0.001}をランダムに割り当てる

In [34]:
P = [0.1, 0.01, 0.001]
TR = np.random.choice(P, len(network))
network_p["TR"] = TR

network_p = network_p[["# FromNodeId", "ToNodeId", "TR"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,TR
0,1,13,0.1
1,1,11,0.01
2,1,6,0.1
3,1,3,0.001
4,1,4,0.01


In [35]:
network_p.to_csv("data/Pokec/TR.csv",index=False)

## P=0.1

In [36]:
P = 0.1
network_p["P=0.1"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.1"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.1
0,1,13,0.1
1,1,11,0.1
2,1,6,0.1
3,1,3,0.1
4,1,4,0.1


In [37]:
network_p.to_csv("data/Pokec/P=0_1.csv",index=False)

## P=0.01

In [38]:
P = 0.01
network_p["P=0.01"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.01"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.01
0,1,13,0.01
1,1,11,0.01
2,1,6,0.01
3,1,3,0.01
4,1,4,0.01


In [39]:
network_p.to_csv("data/Pokec/P=0_01.csv",index=False)

## P=0.001

In [40]:
P = 0.001
network_p["P=0.001"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.001"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.001
0,1,13,0.001
1,1,11,0.001
2,1,6,0.001
3,1,3,0.001
4,1,4,0.001


In [41]:
network_p.to_csv("data/Pokec/P=0_001.csv",index=False)

# Facebook

https://snap.stanford.edu/data/ego-Facebook.html

## データ準備

In [44]:
network = pd.read_table("data/Facebook/facebook_combined.txt", sep=" ",header=None)
network.head()

Unnamed: 0,0,1
0,0,1
1,0,2
2,0,3
3,0,4
4,0,5


In [45]:
network.columns=["# FromNodeId","ToNodeId"]
network.head()

Unnamed: 0,# FromNodeId,ToNodeId
0,0,1
1,0,2
2,0,3
3,0,4
4,0,5


## WeightedCascade

### 各ノードの入次数を計算する

In [46]:
network["ToNodeId"].unique()

array([   1,    2,    3, ..., 4036, 4037, 4038])

In [47]:
node_id = network["ToNodeId"].unique()

In [48]:
in_degree = network.groupby("ToNodeId").size()
in_degree = in_degree.reset_index()
in_degree.columns = ["ToNodeId", "in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree
0,1,1
1,2,1
2,3,1
3,4,1
4,5,1


### 枝確率の計算

枝$e$の終点を頂点$v$  
$d(v)$=頂点$v$の入次数としたとき、
$$ p(e)=\frac{1}{d(v)} $$

In [49]:
in_degree["WC"] = 1/in_degree["in_degree"]
in_degree.head()

Unnamed: 0,ToNodeId,in_degree,WC
0,1,1,1.0
1,2,1,1.0
2,3,1,1.0
3,4,1,1.0
4,5,1,1.0


In [50]:
network_p = pd.merge(network, in_degree, on="ToNodeId", how="left").drop(columns="in_degree")
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,WC
0,0,1,1.0
1,0,2,1.0
2,0,3,1.0
3,0,4,1.0
4,0,5,1.0


### データの出力 

In [52]:
network_p.to_csv("data/Facebook/WC.csv",index=False)

## TRIVARENCY

### {0.1,0.01,0.001}をランダムに割り当てる

In [53]:
P = [0.1, 0.01, 0.001]
TR = np.random.choice(P, len(network))
network_p["TR"] = TR

network_p = network_p[["# FromNodeId", "ToNodeId", "TR"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,TR
0,0,1,0.01
1,0,2,0.01
2,0,3,0.1
3,0,4,0.001
4,0,5,0.01


In [54]:
network_p.to_csv("data/Facebook/TR.csv",index=False)

## P=0.1

In [55]:
P = 0.1
network_p["P=0.1"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.1"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.1
0,0,1,0.1
1,0,2,0.1
2,0,3,0.1
3,0,4,0.1
4,0,5,0.1


In [56]:
network_p.to_csv("data/Facebook/P=0_1.csv",index=False)

## P=0.01

In [57]:
P = 0.01
network_p["P=0.01"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.01"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.01
0,0,1,0.01
1,0,2,0.01
2,0,3,0.01
3,0,4,0.01
4,0,5,0.01


In [58]:
network_p.to_csv("data/Facebook/P=0_01.csv",index=False)

## P=0.001

In [59]:
P = 0.001
network_p["P=0.001"] = P
network_p = network_p[["# FromNodeId", "ToNodeId", "P=0.001"]]
network_p.head()

Unnamed: 0,# FromNodeId,ToNodeId,P=0.001
0,0,1,0.001
1,0,2,0.001
2,0,3,0.001
3,0,4,0.001
4,0,5,0.001


In [60]:
network_p.to_csv("data/Facebook/P=0_001.csv",index=False)