In [1]:
import pandas as pd
import numpy as np
import random
import networkx as nx
import bridgeness
from numpy.random import choice, uniform

from multiprocessing import Pool, TimeoutError
import time
import os

from joblib import Memory
from collections import defaultdict

In [2]:
FN = "graph2.txt.csv"
data = pd.read_csv(FN, delimiter = " ")

G = nx.Graph()

for i,s in data.iterrows():
    u = s[0]
    v = s[1]

    G.add_edge(u,v)
    

In [3]:
#G.nodes()

In [4]:
memory = Memory(cachedir='./cached_bri', verbose=0)

@memory.cache
def cached_bridgeness(G):
  return bridgeness.bridgeness_centrality(G)

bri = cached_bridgeness(G)
#bri = bridgeness.bridgeness_centrality(G)
#e_bri = bridgeness.edge_bridgeness_centrality(G)
#bet = bridgeness.betweenness_centrality(G)

In [5]:
phi = 100
mu = 0.2

#lab0 = 81  #82 in matlab
#lab1 = 190 #191 in matlab
labs = ((81, 0), (190, 1)) #seed nodes: community

adj = nx.to_numpy_matrix(G)
bri_vals = list(bri.values())
quality = (1 / np.exp( np.multiply( bri_vals, phi )))
diag = np.diagflat(quality) #k: v = node, bridgeness
_tmp = np.matmul(diag, adj)
_tmp_sum_on_rows = np.sum(_tmp, axis=0)
_tmp_sum_on_rows_recip = np.reciprocal(_tmp_sum_on_rows)
norm = np.diagflat( _tmp_sum_on_rows_recip )

T = np.matmul( _tmp, norm )


num_nodes = len(G.nodes())
alpha = 2.0/(2.0 + mu)

"""
#closed_form######


prob_comm = T 

labels = np.zeros( (num_nodes, 2) )
labels[lab0,0] = 1
labels[lab1,1] = 1

op1 = (1-alpha) * np.eye(num_nodes) 
op2 = ( np.eye(num_nodes) - ( alpha * T))
op = np.dot(op1, op2.I)

probs = np.dot(op, labels)
"""

'\n#closed_form######\n\n\nprob_comm = T \n\nlabels = np.zeros( (num_nodes, 2) )\nlabels[lab0,0] = 1\nlabels[lab1,1] = 1\n\nop1 = (1-alpha) * np.eye(num_nodes) \nop2 = ( np.eye(num_nodes) - ( alpha * T))\nop = np.dot(op1, op2.I)\n\nprobs = np.dot(op, labels)\n'

In [6]:
#pd.DataFrame.to_csv(pd.DataFrame(probs), 'probs.csv')

In [48]:
#2-comm RW

#start from lab0(s), lab1(s)

steps = 10000 #max steps
convcheckfreq = 1000 #check if converged every N steps
conv_thr = 1e-06

repeatz = 8

#visit probability vec
glob_rw_visits = {} #k = community, v = probs
glob_steps = {}


print("Alpha = %.3f" %(alpha))


def rw(sn): #signed node
    n0 = sn
    print("RW from signed node %i started." % n0)
    rw_visits = np.zeros( (num_nodes, 1) )
    last_rw_visits = np.copy(rw_visits)

    nodes = range(num_nodes)

    #COMM0
    for s0 in range(1,steps+1):

        #start from
        #go back to labeled node? prob = 1-alpha; prob to trans = alpha
        if (uniform() < alpha): #
            trans_probs = T[:,n0].view(np.ndarray).flatten() 
            trans_to = int( choice(nodes, 1, p = trans_probs) )
            n0 = trans_to
        else:
            n0 = sn

        rw_visits[n0] += 1

        if (s0 % convcheckfreq == 0):
            diff_rw_visits = ((rw_visits/s0 - last_rw_visits/s0)**2).sum()
            print("[sn %d] At step %d, diff %.10f" %(sn, s0, diff_rw_visits) )
            if ((diff_rw_visits) < conv_thr):# and np.all(rw_visits != last_rw_visits)):
                print("Converged at <%.10f" % conv_thr)
                break
            last_rw_visits = np.copy(rw_visits)

    return rw_visits/s0, s0


seed_nodes_seq = []
communities_seq = []

for _sn, _comm in labs*repeatz:
    seed_nodes_seq.append(_sn)
    communities_seq.append(_comm)

if __name__ == '__main__':
    with Pool() as pool:
        res = pool.map(rw, seed_nodes_seq)

#consolidate results
all_prob_distr = defaultdict(list) #k = community : v = seednode, steps, probdistr

for r, (prob, steps) in enumerate(res):
    print(r, steps)
    #all_prob_distr.append((seed_nodes_seq[r], communities_seq[r], steps, prob))
    all_prob_distr[communities_seq[r]].append((seed_nodes_seq[r], steps, prob))
    


Alpha = 0.909
RW from signed node 190 started.
RW from signed node 190 started.
RW from signed node 81 started.
RW from signed node 81 started.
[sn 190] At step 1000, diff 0.0253300000
[sn 81] At step 1000, diff 0.0277800000
[sn 190] At step 1000, diff 0.0253300000
[sn 81] At step 1000, diff 0.0277800000
[sn 190] At step 2000, diff 0.0078335000
[sn 81] At step 2000, diff 0.0084570000
[sn 190] At step 2000, diff 0.0078335000
[sn 81] At step 3000, diff 0.0033444444
[sn 190] At step 3000, diff 0.0029142222
[sn 81] At step 2000, diff 0.0084570000
[sn 190] At step 3000, diff 0.0029142222
[sn 81] At step 4000, diff 0.0016162500
[sn 81] At step 3000, diff 0.0033444444
[sn 190] At step 4000, diff 0.0016342500
[sn 190] At step 4000, diff 0.0016342500
[sn 81] At step 5000, diff 0.0013212800
[sn 81] At step 4000, diff 0.0016162500
[sn 190] At step 5000, diff 0.0010636800
[sn 190] At step 5000, diff 0.0010636800
[sn 81] At step 6000, diff 0.0009362778
[sn 81] At step 5000, diff 0.0013212800
[sn 81

In [46]:
#pd.DataFrame.to_csv(pd.DataFrame(rw_probs), 'rw_probs.csv')
for k,v in all_prob_distr.items():
    for iter in v:
        (sn, steps, probs) = iter
        print("Community %d, seed node %d, probabilities:\n %s" % (k, sn, probs[:]))



In [51]:
#all_prob_distr[0]