In [1]:
import argparse

import pandas as pd
import numpy as np
import random
import networkx as nx
import bridgeness
from numpy.random import choice, uniform

from multiprocessing import Pool, TimeoutError
import time
import os

from joblib import Memory
from collections import defaultdict

In [2]:
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--num_steps", type=int, help="Number of steps", default=10000)
parser.add_argument("--check_every" , type=int, help="Check for convergence every N steps", default=100)
parser.add_argument("--phi" , type=float, help="Phi marameter", default=100)
parser.add_argument("--mu" , type=float, help="Mu parameter", default=0.2)

args = parser.parse_args([])
print(args)

Namespace(check_every=100, mu=0.2, num_steps=10000, phi=100)


In [3]:
FN = "graph2.txt.csv"
data = pd.read_csv(FN, delimiter = " ")

G = nx.Graph()

for i,s in data.iterrows():
    u = s[0]
    v = s[1]

    G.add_edge(u,v)

In [4]:
#G.nodes()

In [5]:
memory = Memory(cachedir='./cached_bri', verbose=0)

@memory.cache
def cached_bridgeness(G):
  return bridgeness.bridgeness_centrality(G)

bri = cached_bridgeness(G)
#bri = bridgeness.bridgeness_centrality(G)
#e_bri = bridgeness.edge_bridgeness_centrality(G)
#bet = bridgeness.betweenness_centrality(G)

In [6]:
phi = args.phi
mu = args.mu
#phi = 100
#mu = 0.2

#lab0 = 81  #82 in matlab
#lab1 = 190 #191 in matlab
labs = ((81, 0), (190, 1)) #seed nodes: community

adj = nx.to_numpy_matrix(G)
bri_vals = list(bri.values())
quality = (1 / np.exp( np.multiply( bri_vals, phi )))
diag = np.diagflat(quality) #k: v = node, bridgeness
_tmp = np.matmul(diag, adj)
_tmp_sum_on_rows = np.sum(_tmp, axis=0)
_tmp_sum_on_rows_recip = np.reciprocal(_tmp_sum_on_rows)
norm = np.diagflat( _tmp_sum_on_rows_recip )

T = np.matmul( _tmp, norm )


num_nodes = len(G.nodes())
alpha = 2.0/(2.0 + mu)

"""
#closed_form, unused######

prob_comm = T 

labels = np.zeros( (num_nodes, 2) )
labels[lab0,0] = 1
labels[lab1,1] = 1

op1 = (1-alpha) * np.eye(num_nodes) 
op2 = ( np.eye(num_nodes) - ( alpha * T))
op = np.dot(op1, op2.I)

probs = np.dot(op, labels)
"""

'\n#closed_form, unused######\n\nprob_comm = T \n\nlabels = np.zeros( (num_nodes, 2) )\nlabels[lab0,0] = 1\nlabels[lab1,1] = 1\n\nop1 = (1-alpha) * np.eye(num_nodes) \nop2 = ( np.eye(num_nodes) - ( alpha * T))\nop = np.dot(op1, op2.I)\n\nprobs = np.dot(op, labels)\n'

In [7]:
#2-comm RW

#start from lab0(s), lab1(s)

steps = args.num_steps
convcheckfreq = args.check_every
#steps = 10000000 #max steps
#convcheckfreq = 10000 #check if converged every N steps

conv_thr = 1e-06

#repeatz = 8
repeatz = 1 #every repetition converges to same distribution, apparently

#visit probability vec
glob_rw_visits = {} #k = community, v = probs
glob_steps = {}

print("Alpha = %.3f" %(alpha))

def rw(sn): #signed node
    n0 = sn
    print("RW from signed node %i started." % n0)
    rw_visits = np.zeros( (num_nodes, 1) )
    last_rw_visits = np.copy(rw_visits)

    nodes = range(num_nodes)

    for s0 in range(1,steps+1):

        #start from
        #go back to labeled node? prob = 1-alpha; prob to trans = alpha
        if (uniform() < alpha): #
            trans_probs = T[:,n0].view(np.ndarray).flatten() 
            trans_to = int( choice(nodes, 1, p = trans_probs) )
            n0 = trans_to
        else:
            n0 = sn

        rw_visits[n0] += 1

        if (s0 % convcheckfreq == 0):
            diff_rw_visits = ((rw_visits/s0 - last_rw_visits/s0)**2).sum()
            print("[sn %d] At step %d, diff %.10f" %(sn, s0, diff_rw_visits) )
            if ((diff_rw_visits) < conv_thr):# and np.all(rw_visits != last_rw_visits)):
                print("Converged at <%.10f" % conv_thr)
                break
            last_rw_visits = np.copy(rw_visits)

    return rw_visits/s0, s0

#output

seed_nodes_seq = []
communities_seq = []

for _sn, _comm in labs*repeatz:
    seed_nodes_seq.append(_sn)
    communities_seq.append(_comm)

if __name__ == '__main__':
    with Pool() as pool:
        res = pool.map(rw, seed_nodes_seq)

#consolidate results
all_prob_distr = defaultdict(list) #k = community : v = seednode, steps, probdistr

for r, (prob, steps) in enumerate(res):
    print(r, steps)
    #all_prob_distr.append((seed_nodes_seq[r], communities_seq[r], steps, prob))
    all_prob_distr[communities_seq[r]].append((seed_nodes_seq[r], steps, prob))
    

with open('probs.npy', 'wb') as f:
    np.save(f, all_prob_distr)


Alpha = 0.909
RW from signed node 81 started.
RW from signed node 190 started.
[sn 81] At step 100, diff 0.0508000000
[sn 190] At step 100, diff 0.0492000000
[sn 190] At step 200, diff 0.0107500000
[sn 81] At step 200, diff 0.0131500000
[sn 190] At step 300, diff 0.0044888889
[sn 81] At step 300, diff 0.0068222222
[sn 190] At step 400, diff 0.0016375000
[sn 81] At step 400, diff 0.0024500000
[sn 190] At step 500, diff 0.0018320000
[sn 190] At step 600, diff 0.0010444444
[sn 81] At step 500, diff 0.0020960000
[sn 190] At step 700, diff 0.0007877551
[sn 81] At step 600, diff 0.0009777778
[sn 190] At step 800, diff 0.0004031250
[sn 81] At step 700, diff 0.0008122449
[sn 81] At step 800, diff 0.0005375000
[sn 190] At step 900, diff 0.0005629630
[sn 190] At step 1000, diff 0.0003760000
[sn 81] At step 900, diff 0.0007012346
[sn 190] At step 1100, diff 0.0004066116
[sn 81] At step 1000, diff 0.0003660000
[sn 81] At step 1100, diff 0.0004231405
[sn 190] At step 1200, diff 0.0003055556
[sn 81]

In [8]:
for k,v in all_prob_distr.items():
    for iter in v:
        (sn, steps, probs) = iter
        print("Community %d, seed node %d, probabilities:\n %s" % (k, sn, probs[:]))

Community 0, seed node 81, probabilities:
 [[6.600e-03]
 [0.000e+00]
 [1.800e-02]
 [1.000e-04]
 [4.100e-03]
 [9.000e-04]
 [2.290e-02]
 [0.000e+00]
 [0.000e+00]
 [1.600e-03]
 [7.000e-04]
 [2.950e-02]
 [1.400e-03]
 [1.900e-02]
 [0.000e+00]
 [1.160e-02]
 [2.000e-04]
 [0.000e+00]
 [3.000e-03]
 [1.000e-04]
 [8.000e-04]
 [2.800e-03]
 [1.870e-02]
 [4.000e-03]
 [7.400e-03]
 [2.200e-03]
 [2.250e-02]
 [0.000e+00]
 [3.300e-03]
 [2.510e-02]
 [2.960e-02]
 [1.550e-02]
 [1.430e-02]
 [2.000e-04]
 [6.400e-03]
 [1.270e-02]
 [1.050e-02]
 [1.300e-03]
 [1.700e-03]
 [2.400e-03]
 [4.000e-04]
 [1.360e-02]
 [1.000e-03]
 [8.500e-03]
 [2.430e-02]
 [2.000e-04]
 [3.040e-02]
 [9.200e-03]
 [1.000e-03]
 [2.700e-03]
 [3.000e-03]
 [1.500e-03]
 [1.300e-03]
 [2.440e-02]
 [2.080e-02]
 [9.000e-04]
 [2.020e-02]
 [0.000e+00]
 [4.800e-03]
 [1.770e-02]
 [0.000e+00]
 [1.810e-02]
 [0.000e+00]
 [0.000e+00]
 [1.940e-02]
 [1.650e-02]
 [8.000e-04]
 [3.000e-04]
 [2.490e-02]
 [2.820e-02]
 [9.000e-04]
 [1.920e-02]
 [7.000e-04]
 [3.800e

In [9]:
final_probs = np.concatenate([all_prob_distr[0][0][2], all_prob_distr[1][0][2]], axis = 1)

In [10]:
pd.DataFrame.to_csv(pd.DataFrame(probs), 'RW_probs.csv')