In [1]:
%matplotlib inline
import pymongo
import networkx as nx
import time
import itertools

from multiprocessing import Pool
from pymongo import MongoClient

from sweepy.get_config import get_config

config = get_config()

MONGO_URL = config.get('MONGO_URL')
MONGO_PORT = config.get('MONGO_PORT')
MONGO_USERNAME = config.get('MONGO_USERNAME')
MONGO_PASSWORD = config.get('MONGO_PASSWORD')

client = MongoClient(MONGO_URL, int(MONGO_PORT))

db = client.tweets
db.authenticate(MONGO_USERNAME, MONGO_PASSWORD)

users = db.users

In [2]:
graph  = nx.DiGraph()

In [3]:
for user in users.find():
    graph.add_node(str(user['id_str']))
    for friend_id in user['friends_ids']:
        if not friend_id in graph:
            graph.add_node(str(friend_id))
        graph.add_edge(str(user['id_str']), str(friend_id))

In [4]:
"""
Example of parallel implementation of betweenness centrality using the
multiprocessing module from Python Standard Library.

The function betweenness centrality accepts a bunch of nodes and computes
the contribution of those nodes to the betweenness centrality of the whole
network. Here we divide the network in chunks of nodes and we compute their
contribution to the betweenness centrality of the whole network.
"""
def chunks(l, n):
    """Divide a list of nodes `l` in `n` chunks"""
    l_c = iter(l)
    while 1:
        x = tuple(itertools.islice(l_c, n))
        if not x:
            return
        yield x


def _betmap(G_normalized_weight_sources_tuple):
    """Pool for multiprocess only accepts functions with one argument.
    This function uses a tuple as its only argument. We use a named tuple for
    python 3 compatibility, and then unpack it when we send it to
    `betweenness_centrality_source`
    """
    return nx.betweenness_centrality_source(*G_normalized_weight_sources_tuple)


def betweenness_centrality_parallel(G, processes=None):
    """Parallel betweenness centrality  function"""
    p = Pool(processes=processes)
    node_divisor = len(p._pool)*4
    node_chunks = list(chunks(G.nodes(), int(G.order()/node_divisor)))
    num_chunks = len(node_chunks)
    bt_sc = p.map(_betmap,
                  zip([G]*num_chunks,
                      [True]*num_chunks,
                      [None]*num_chunks,
                      node_chunks))

    # Reduce the partial solutions
    bt_c = bt_sc[0]
    for bt in bt_sc[1:]:
        for n in bt:
            bt_c[n] += bt[n]
    return bt_c

In [None]:
print("Computing betweenness centrality for:")
print(nx.info(graph))
start = time.time()
bt = nx.betweenness_centrality(graph, k=1000)
print("\t\tTime: %.4F" % (time.time()-start))
print("\t\tBetweenness centrality for node 0: %.5f" % (bt[0]))

In [18]:
import operator
betweeness = sorted(bt.items(), key=operator.itemgetter(1))

In [None]:
betweeness

In [4]:
eigen_values = nx.eigenvector_centrality(graph)
import operator
sorted_eigen_values = sorted(eigen_values.items(), key=operator.itemgetter(1))

In [11]:
sorted_eigen_values

[('121817564', 0.09428345177961117),
 ('11107172', 0.07497270265786563),
 ('807095', 0.06812954689012012),
 ('216695058', 0.06667872003411948),
 ('20861943', 0.06605647185989534),
 ('58524428', 0.06489206418816294),
 ('16104258', 0.06282196653973698),
 ('18816166', 0.06144353212620821),
 ('436873679', 0.05926741271555719),
 ('14677919', 0.0592280032725243),
 ('110213431', 0.05877964626075489),
 ('16833750', 0.05782045678043389),
 ('16399949', 0.05725410667069389),
 ('30752163', 0.05710530533438369),
 ('51241574', 0.05651227714587067),
 ('11204932', 0.05560782730239151),
 ('25160944', 0.05522319472433831),
 ('35773039', 0.05460132048045243),
 ('19751038', 0.05455624180234738),
 ('243822289', 0.052810557279642574),
 ('9149492', 0.05271866372433641),
 ('813286', 0.05268471443612777),
 ('29006114', 0.05239316749545155),
 ('14357989', 0.05231817117919128),
 ('25074661', 0.052225119139207805),
 ('122188108', 0.051415120263709015),
 ('27000730', 0.05137570087169409),
 ('27810354', 0.050730155