# GRAPH FEATURES

Here we will compute for the three graphs (full graph, test graph, train graph), some features:

- **Betweeness centrality** of all the nodes

In [12]:
import os
import pickle
import itertools
import networkx as nx
from multiprocessing import Pool

## 1. Load the graphs

In [6]:
# Paths where we will save the data
# ---------------------------------
data_folder = "../data"
  
datasets_folder = os.path.join(data_folder, "datasets")
models_folder = os.path.join(data_folder, "models")
graph_feat_folder = os.path.join(data_folder, "graph_features")
initial_data_folder = os.path.join(data_folder, "initial_data")

# Create the folder if it does not exists yet
os.makedirs(graph_feat_folder, exist_ok=True)

# Edgelist path
full_edges_path = os.path.join(initial_data_folder, 'edgelist.txt')
train_edges_path = os.path.join(datasets_folder, 'train_graph_edgelist.txt')
test_edges_path = os.path.join(datasets_folder, 'test_graph_edgelist.txt')
# Betweeness centrality path
betcen_full_graph_path = os.path.join(graph_feat_folder, "full_graph.bet_centrality")
betcen_train_graph_path = os.path.join(graph_feat_folder, "train_graph.bet_centrality")
betcen_test_graph_path = os.path.join(graph_feat_folder, "test_graph.bet_centrality")


In [5]:
# Complete graph
full_graph = nx.read_edgelist(full_edges_path, delimiter=',', nodetype=int)
# Train graph
train_graph = nx.read_edgelist(train_edges_path, delimiter=',', nodetype=int)
# Test graph
test_graph = nx.read_edgelist(test_edges_path, delimiter=',', nodetype=int)

In [56]:
print('-'*50)

--------------------------------------------------


In [10]:
def chunks(l, n):
    """Divide a list of nodes `l` in `n` chunks"""
    l_c = iter(l)
    while 1:
        x = tuple(itertools.islice(l_c, n))
        if not x:
            return
        yield x


def betweenness_centrality_parallel(G, processes=None):
    """Parallel betweenness centrality  function"""
    p = Pool(processes=processes)
    node_divisor = len(p._pool) * 4
    node_chunks = list(chunks(G.nodes(), int(G.order() / node_divisor)))
    num_chunks = len(node_chunks)
    bt_sc = p.starmap(
        nx.betweenness_centrality_subset,
        zip(
            [G] * num_chunks,
            node_chunks,
            [list(G)] * num_chunks,
            [True] * num_chunks,
            [None] * num_chunks,
        ),
    )

    # Reduce the partial solutions
    bt_c = bt_sc[0]
    for bt in bt_sc[1:]:
        for n in bt:
            bt_c[n] += bt[n]
    return bt_c


In [13]:
nx.eccentricity(full_graph,1)

NetworkXError: Found infinite path length because the graph is not connected