# Wikispeedia

In [25]:
import pandas as pd

import networkx as nx
import nx_parallel as nxp
from joblib import parallel_config, Parallel, delayed

import json

# enabling networkx's config for nx-parallel
nx.config.backends.parallel.active = True

# Setting global configs
nxp_config = nx.config.backends.parallel
nxp_config.n_jobs = -1
nxp_config.verbose = 50

Read data

In [26]:
links = pd.read_csv('links.tsv', delimiter='\t', names=['u', 'v'], header=None)
paths_finished_data = pd.read_csv('paths_finished.tsv', delimiter='\t', names=['a', 'b', 'c', 'path', 'd'], header=None).loc[:, 'path'].values.tolist()
paths_finished = []
for s in paths_finished_data:
    l = s.split(';')
    paths_finished.append(l)
paths_finished

[['14th_century',
  '15th_century',
  '16th_century',
  'Pacific_Ocean',
  'Atlantic_Ocean',
  'Accra',
  'Africa',
  'Atlantic_slave_trade',
  'African_slave_trade'],
 ['14th_century',
  'Europe',
  'Africa',
  'Atlantic_slave_trade',
  'African_slave_trade'],
 ['14th_century',
  'Niger',
  'Nigeria',
  'British_Empire',
  'Slavery',
  'Africa',
  'Atlantic_slave_trade',
  'African_slave_trade'],
 ['14th_century', 'Renaissance', 'Ancient_Greece', 'Greece'],
 ['14th_century',
  'Italy',
  'Roman_Catholic_Church',
  'HIV',
  'Ronald_Reagan',
  'President_of_the_United_States',
  'John_F._Kennedy'],
 ['14th_century',
  'Europe',
  'North_America',
  'United_States',
  'President_of_the_United_States',
  'John_F._Kennedy'],
 ['14th_century', 'China', 'Gunpowder', 'Fire'],
 ['14th_century', 'Time', 'Isaac_Newton', 'Light', 'Color', 'Rainbow'],
 ['14th_century', 'Time', 'Light', 'Rainbow'],
 ['14th_century',
  '15th_century',
  'Plato',
  'Nature',
  'Ultraviolet',
  'Color',
  'Rainbow'],


Generate the graph from the edge list and plot it

*Nota: il grafo generato ha 4592 nodi mentre la lista degli articoli contiene 4605 articoli: ci sono 4605-4592 articoli senza link che nel seguito non ho preso in considerazione*

In [27]:
G = nx.from_pandas_edgelist(links, source='u', target='v')
H = nxp.ParallelGraph(G)

In [28]:
nx.draw_networkx(G, arrows=False, with_labels=False, node_size=1, width=0.1)

KeyboardInterrupt: 

Compute all shortest paths between all nodes

In [None]:
def save_paths(node):
    single_source_paths_generator = nx.single_source_all_shortest_paths(G, node)
    single_source_paths = dict(single_source_paths_generator)
    with open('offline_' + node + '.txt', 'w') as out_file:
        out_file.write(json.dumps(single_source_paths))

nodes = list(G.nodes)
Parallel(n_jobs=-1)(delayed(save_paths)(node) for node in nodes)

Compute the shortest-path betweenness centrality for nodes (not approximated)

*Nota: risultato salvato in nodes_bc.txt*

In [None]:
nodes_bc = nxp.betweenness_centrality(H);

In [38]:
def compare_paths(path):
    first_node = path[0]
    last_node = path[-1]
    try:
        with open('paths/' + first_node + '.txt') as in_file:
            all_paths = json.loads(in_file.read())
        all_paths_ending_in_last_node = all_paths[last_node]
        # operazione da fare sul percorso
    except:
        print('Error')
        
Parallel(n_jobs=-1)(delayed(compare_paths)(path) for path in paths_finished)

African_slave_trade
Rainbow
Rainbow
Rainbow
Elizabeth_I_of_England
Atom
James_II_of_England
Burundi
Vampire_bat
Minnesota
Potassium_iodide
Mozilla_Firefox
Surtsey
Winston_Churchill
Gold
Great_white_shark
Bongo_%28antelope%29
Ivory
Ivory
Ocean
Ocean
Ocean
Ocean
Ocean
Ocean
England
England
England
England
Temperate
Sauropodomorpha
Differential_equation
Differential_equation
Sleep
Sleep
Joseph_Stalin
Francis_Bacon
Mao_Zedong
Mao_Zedong
Famine
Barley
Tropical_Storm_Delta_%282005%29
Africa
Jesus
Jesus
Adolf_Hitler
Adolf_Hitler
Paprika
Paprika
Western_Roman_Empire
Western_Roman_Empire
Google
Google
Google
Google
Google
Google
Yugoslavia
Yugoslavia
Prague
Argon
Carrot
Women%27s_rights
Parrot
Parrot
Parrot
Parrot
Ku_Klux_Klan
Ku_Klux_Klan
Kuwait_City
Kuwait_City
Volga_River
Macau
London_Bridge
Nitrification
Dog
Apostolic_Succession
Shelduck
Integrated_circuit
Ireland
Ireland
Hurricane_Katrina
Hurricane_Katrina
Avalanche
Avalanche
Jersey
Hip_hop_music
Hip_hop_music
Hip_hop_music
Chester_A._Arth

KeyboardInterrupt: 