In [26]:
import networkx as nx

def to_number(x):
    try:
        return int(x)
    except ValueError:
        return float(x)

def read_pajek(name, path = "."):
    names = dict()
    G = nx.MultiDiGraph()
    with open(path + "/" + name + ".net", 'r', encoding="utf-8") as file:
        file.readline()

        for line in file:
            if line.startswith("*"):
                break
            else:
                node = line.split("\"")
                G.add_node(int(node[0]) - 1, label = node[1])
                names[int(node[0]) - 1] = node[1]

        for line in file:
            i, j, w = map(to_number, line.split())
            i -= 1
            j -= 1
            G.add_edge(i, j, weight=float(w))
      
    return G, names

graphs = dict()
name_mapping = dict()
modes = ["no_weights", "time_diff", "normalized_time_diff", "scaled_time_diff", "points"]
for mode in modes:
    graphs[mode], name_mapping[mode] = read_pajek(f"TDF_{mode}", "output_graphs")
    print(f"Graph Info: {mode}")
    print(graphs[mode].number_of_nodes())
    print(graphs[mode].number_of_edges())

Graph Info: no_weights
5258
15048726
Graph Info: time_diff
5258
15048726
Graph Info: normalized_time_diff
5258
15048726
Graph Info: scaled_time_diff
5258
15048726
Graph Info: points
5258
15048726


In [None]:
results = dict()
for mode in modes:
    result = nx.pagerank(graphs[mode],max_iter=10000)
    results[mode] = {name: score for name,score in zip(name_mapping[mode].values(),result.values())}
    results[mode] = {k:v for k, v in sorted(results[mode].items(), key=lambda x: x[1], reverse=True)}


{'FRANTZ Nicolas': 0.0028651432282388273, 'GARRIGOU Gustave': 0.0024283673922730945, 'ALAVOINE Jean': 0.002337042997072302, 'FABER François': 0.002256459828484363, 'CHRISTOPHE Eugène': 0.002196030120048611, 'THYS Philippe': 0.0018933493554172769, 'GEORGET Émile': 0.0016685010954520724, 'PETIT-BRETON Lucien': 0.001605586895888186, 'BUYSSE Lucien': 0.0015316598527573835, 'LEDUCQ André': 0.001521761324815709, 'BELLENGER Romain': 0.0015116042198828757, 'TROUSSELIER Louis': 0.0014755695008016444, 'TIBERGHIEN Hector': 0.0014725851256154343, 'SELLIER Félix': 0.0014596796630424837, 'DARRIGADE André ': 0.0014305039348193275, 'ZOETEMELK Joop ': 0.0014168534094635616, 'DEWAELE Maurice': 0.0013869970127007927, 'ROSSIUS Jean': 0.0013798432218064284, 'POULIDOR Raymond ': 0.0013664344509285405, 'MAGNE Antonin': 0.0013468338114441833, 'LAMBOT Firmin': 0.0013345414362127127, 'DELANNOY Louis': 0.001299752279567922, 'BIDOT Marcel': 0.001297795014541455, 'KELLY Sean ': 0.0012940954900852982, 'VERVAECKE Ju

In [38]:
def top10(data):
    data = iter(data)
    for i in range(10):
        row = next(data)
        print(f"{i+1} - {row}")
top10(results["scaled_time_diff"])

1 - FRANTZ Nicolas
2 - CHRISTOPHE Eugène
3 - GARRIGOU Gustave
4 - ALAVOINE Jean
5 - FABER François
6 - DEWAELE Maurice
7 - THYS Philippe
8 - LEDUCQ André
9 - VERVAECKE Julien
10 - TIBERGHIEN Hector


In [None]:
graphs["no_weights"].nodes(data=True)


NodeDataView({0: {'label': 'GARIN Maurice '}, 1: {'label': 'PAGIE Émile'}, 2: {'label': 'GEORGET Léon'}, 3: {'label': 'AUGEREAU Fernand '}, 4: {'label': 'FISCHER Jean '}, 5: {'label': 'KERFF Marcel'}, 6: {'label': 'CATTEAU Aloïs '}, 7: {'label': 'PIVIN Ernest'}, 8: {'label': 'HABETS Léon'}, 9: {'label': 'BEAUGENDRE François'}, 10: {'label': 'MULLER Rodolfo '}, 11: {'label': 'GAUBAN Jean '}, 12: {'label': 'JAECK Anton '}, 13: {'label': 'POTHIER Lucien '}, 14: {'label': 'LEQUATRE Marcel'}, 15: {'label': 'PASQUIER Gustave'}, 16: {'label': 'LAESER Charles'}, 17: {'label': 'LOOTENS Julien'}, 18: {'label': 'FISCHER Josef'}, 19: {'label': 'SALAIS René'}, 20: {'label': 'BOROT Georges'}, 21: {'label': 'LECHARTIER Isidore'}, 22: {'label': 'DARGASSIES Jean'}, 23: {'label': 'GIRBE Julien'}, 24: {'label': 'FOUREAUX Alexandre'}, 25: {'label': 'MONACHON Ulysse'}, 26: {'label': 'DE BALADE Philippe'}, 27: {'label': 'ELLINAMOUR Henri'}, 28: {'label': 'PAYAN Ferdinand'}, 29: {'label': 'GUILLARME Gustave'