In [44]:
from IPython.lib import backgroundjobs as bg
from tqdm import tqdm_notebook
from itertools import combinations
import pickle
import collections

## Local features: 

#### Method 1: Common Neighbors
$$ CN(x,y) = \sum\nolimits_{z \in |\Gamma(x) \cap \Gamma(y)|} w(x,z) + w(y,z)$$

#### Method 2: Jaccard Coefficients 
$$ JC(x,y) = \sum\nolimits_{z \in |\Gamma(x) \cap \Gamma(y)|} \frac{w(x,z) + w(y,z)}{{\sum\nolimits_{a \in \Gamma(x)}}w(a,x) + {\sum\nolimits_{b \in \Gamma(y)}}w(b,y)}$$

#### Method 3: Perferrential Attachment 
the probability that a new edge has node x as an endpoint is proportional to the its weights.
$$ PA(x,y) = \sum\nolimits_{a \in \Gamma(x)} w(a,x) * \sum\nolimits_{b \in \Gamma(y)} w(b,y)$$

#### Method 4:  Adamic-Adar Coefficient 
similar to Method 2, but it defines a higher importance to the common neighbors which have fewer neighbors.
$$ AA(x,y) = \sum\nolimits_{z \in |\Gamma(x) \cap \Gamma(y)|} \frac{w(x,z) + w(y,z)}{log (1+ {\sum\nolimits_{c \in \Gamma(z)}}w(a,x))}$$

In [39]:
def common_neighbor(neighbours, pairs, weights, cache):    
    for u, v in tqdm_notebook(pairs, mininterval=1):
        if u > v:
            u, v = v, u
        if (u, v) not in cache:
            neighbour = neighbours[u].intersection(neighbours[v])
            if neighbour:
                cache[(u, v)] = sum(weights[(u, n)] + weights[(v, n)] for n in neighbour)

In [52]:
def jaccard_coefficient(neighbours, pairs, weights, sum_weights, cache):
    for u, v in tqdm_notebook(pairs, mininterval=1):
        if u > v:
            u, v = v, u
        if (u, v) not in cache:
            neighbour = neighbours[u].intersection(neighbours[v])
            sum_u = sum_weights[u]
            sum_v = sum_weights[v]
            if neighbour:
                cache[(u, v)] = sum((weights[(u, n)] + weights[(v, n)])/(sum_u + sum_v) for n in neighbour)

In [48]:
def prefer_attach(pairs, sum_weights, cache):
    for u, v in tqdm_notebook(pairs, mininterval=1):
        if u > v:
            u, v = v, u
        if (u, v) not in cache:
            cache[(u, v)] = sum_weights[u] * sum_weights[v]

### Import weights and neighbours representing weighted undirected graph
Note: weights should be symmetric for the ease of subsequent computation
to convert snap graph to neighbours dict:
```
def get_neighbours(node):
    return set(node.GetNbrNId(i) for i in range(node.GetDeg()))
neighbours={node.GetId():get_neighbours(node) for node in G.Nodes()}
with open('./data/weight3-neighbours.pickle', 'wb') as picklefile:
    pickle.dump(neighbours, picklefile)
```

In [None]:
with open('./data/weight2-weights.pickle', 'rb') as picklefile:
    weights = pickle.load(picklefile, encoding='latin1')
with open('./data/weight2-neighbours.pickle', 'rb') as picklefile:
    neighbours = pickle.load(picklefile, encoding='latin1')

In [14]:
with open('./data/q1_4000_users.pickle', 'rb') as picklefile:
    users = pickle.load(picklefile, encoding='latin1')

In [34]:
pairs = list(combinations(users, 2)) # convert to list for nicer progress bar

In [45]:
jobs = bg.BackgroundJobManager()

In [47]:
cn_cache = collections.defaultdict(float)
jobs.new(lambda:common_neighbor(neighbours, pairs, weights, cn_cache))

HBox(children=(IntProgress(value=0, max=8542911), HTML(value='')))

<BackgroundJob #0: <function <lambda> at 0x1140c3d08>>

In [None]:
with open('./data/weight2_cn.pickle', 'wb') as picklefile:
    pickle.dump(cn_cache, picklefile)

In [41]:
sum_weights = {user: sum(weights[(user,n)] for n in neighbours[user]) for user in users}

In [49]:
pa_cache = collections.defaultdict(float)
jobs.new(lambda:prefer_attach(pairs, sum_weights, pa_cache))

<BackgroundJob #2: <function <lambda> at 0x1051e7ae8>>

HBox(children=(IntProgress(value=0, max=8542911), HTML(value='')))

In [50]:
with open('./data/weight2_pa.pickle', 'wb') as picklefile:
    pickle.dump(pa_cache, picklefile)

In [53]:
jaccard_cache = collections.defaultdict(float)
jobs.new(lambda:jaccard_coefficient(neighbours, pairs, weights, sum_weights, jaccard_cache))

<BackgroundJob #3: <function <lambda> at 0x1051e7f28>>

HBox(children=(IntProgress(value=0, max=8542911), HTML(value='')))

In [None]:
with open('./data/weight2_jaccard.pickle', 'wb') as picklefile:
    pickle.dump(jaccard_cache, picklefile)