In [1]:
from functools import partial
from multiprocessing import Pool
import pandas as pd
from infomap import Infomap
from infomap_worker import infomap_worker

In [2]:
args = {
    "silent": True,
    "directed": True
}

num_trials = 100

In [3]:
%%time

im = Infomap(num_trials=num_trials, **args)
im.read_file("science1997_2y.net")
im.run()
im.codelength, im.num_top_modules

CPU times: user 1min 46s, sys: 47.1 s, total: 2min 33s
Wall time: 39.9 s


(7.293725695201871, 69)

In [4]:
%%time

worker = partial(infomap_worker, **args)

with Pool() as p:
    worker_args = (("science1997_2y.net", f"output/science1997_{seed}.tree", seed) for seed in range(1, num_trials+1))
    output = p.starmap(worker, worker_args)

runs = pd.DataFrame(output).sort_values("codelength").reset_index(drop=True)
runs.head()

CPU times: user 68.1 ms, sys: 109 ms, total: 177 ms
Wall time: 13.1 s


Unnamed: 0,codelength,num_levels,num_modules,outname
0,7.295476,4,67,output/science1997_52.tree
1,7.296154,4,69,output/science1997_82.tree
2,7.296576,4,68,output/science1997_83.tree
3,7.29661,4,69,output/science1997_40.tree
4,7.296678,4,69,output/science1997_44.tree


In [5]:
best = runs.iloc[0].outname
tree = pd.read_csv(best, sep=" ", comment="#", names=["path", "flow", "name", "node_id"])
tree["module"] = tree.path.apply(lambda x: ":".join(x.split(":")[0:-1]))
tree

Unnamed: 0,path,flow,name,node_id,module
0,1:1:1,0.032975,J BIOL CHEM,2557,1:1
1,1:1:2,0.030853,NATURE,3683,1:1
2,1:1:3,0.029864,SCIENCE,4460,1:1
3,1:1:4,0.024917,P NATL ACAD SCI USA,3934,1:1
4,1:1:5,0.022752,CELL,965,1:1
...,...,...,...,...,...
6434,63:1,0.000000,SOC DYNAMICS,6276,63
6435,64:1,0.000000,WILSON LIBR BULL,6405,64
6436,65:1,0.000000,WORKFORCE,6415,65
6437,66:1,0.000000,ACTA BIOL CRACOV BOT,33,66
