# GRAPH FEATURES

Here we will compute for the three graphs (full graph, test graph, train graph), some features:

- **Betweeness centrality** of all the nodes

In [2]:
import os
import pickle
import itertools
import networkx as nx
from multiprocessing import Pool

import paths

## 1. Load the graphs

In [5]:
# Complete graph
full_graph = nx.read_edgelist(
    paths.FULL_GRAPH_EDGELIST_PATH,
    delimiter=',', 
    nodetype=int
)
# Train graph
train_graph = nx.read_edgelist(
    paths.TRAIN_EDGELIST_PATH,
    delimiter=',',
    nodetype=int
)
# Test graph
test_graph = nx.read_edgelist(
    paths.TEST_EDGELIST_PATH,
    delimiter=',',
    nodetype=int
)

In [10]:
def chunks(l, n):
    """Divide a list of nodes `l` in `n` chunks"""
    l_c = iter(l)
    while 1:
        x = tuple(itertools.islice(l_c, n))
        if not x:
            return
        yield x


def betweenness_centrality_parallel(G, processes=None):
    """Parallel betweenness centrality  function"""
    p = Pool(processes=processes)
    node_divisor = len(p._pool) * 4
    node_chunks = list(chunks(G.nodes(), int(G.order() / node_divisor)))
    num_chunks = len(node_chunks)
    bt_sc = p.starmap(
        nx.betweenness_centrality_subset,
        zip(
            [G] * num_chunks,
            node_chunks,
            [list(G)] * num_chunks,
            [True] * num_chunks,
            [None] * num_chunks,
        ),
    )

    # Reduce the partial solutions
    bt_c = bt_sc[0]
    for bt in bt_sc[1:]:
        for n in bt:
            bt_c[n] += bt[n]
    return bt_c


In [3]:
if not os.path.isfile(paths.FULL_GRAPH_BET_CENT_PATH):
    full_graph_betcen = betweenness_centrality_parallel(full_graph, 1000)

    with open(paths.FULL_GRAPH_BET_CENT_PATH, "wb") as f:
        pickle.dump(full_graph_betcen, f)

In [None]:
if not os.path.isfile(paths.TRAIN_GRAPH_BET_CENT_PATH):
    train_graph_betcen = betweenness_centrality_parallel(train_graph, 1000)

    with open(paths.TRAIN_GRAPH_BET_CENT_PATH, "wb") as f:
        pickle.dump(train_graph_betcen, f)

In [None]:
if not os.path.isfile(paths.TEST_GRAPH_BET_CENT_PATH):
    test_graph_betcen = betweenness_centrality_parallel(test_graph, 1000)

    with open(paths.TEST_GRAPH_BET_CENT_PATH, "wb") as f:
        pickle.dump(test_graph_betcen, f)