# Utils

In [1]:
import networkit as nk

In [2]:
from networkit.community import CoverF1Similarity, OverlappingNMIDistance


def calc_f1(
    graph: nk.Graph,
    ground_truth: nk.Cover,
    lazy_fox_result: nk.Cover,
) -> float:
    # F1
    f1 = CoverF1Similarity(graph, ground_truth, lazy_fox_result)
    f1.run()
    return f1.getWeightedAverage()

def calc_nmi(
    graph: nk.Graph,
    ground_truth: nk.Cover,
    lazy_fox_result: nk.Cover,
) -> float:
    nmi = OverlappingNMIDistance()
    distance = nmi.getDissimilarity(graph, ground_truth, lazy_fox_result)
    return distance

In [3]:
from networkit.graphio import CoverReader
from pathlib import Path
from os import listdir


cover_reader = CoverReader()

def load_lazyfox_as_cover(outpath: Path, graph: nk.Graph) -> nk.Cover:
    """When pointed at a lazyFOX output directory,
    reads in the last iteration and returns it as networkx.Cover object"""
    files = listdir(outpath / "iterations")
    highest_iteration = 0
    for f in files:
        if "clusters" not in f:
            continue
        iteration = int(f[:f.index("clusters.txt")])
        highest_iteration = max(highest_iteration, iteration)
        
    return cover_reader.read(
        str(outpath / "iterations" / f"{highest_iteration}clusters.txt"),
        graph,
    )

In [4]:
import networkx as nx
from networkit.nxadapter import nx2nk


def load_edgelist(path: Path) -> nk.Graph:
    return nx2nk(nx.read_edgelist(path))

# Alternative Algos

In [5]:
ALTERNATIVE_ALGOS_ROOT = "/home/timgarrels/Projects/LazyFox/alternative_algos/data"
DATASET_DIRECTORY = "/home/timgarrels/Projects/LazyFox/notebooks/datasets/"
DATASETS = ["eu", "dblp", "lj"]

In [6]:
from os.path import join


def get_graph(dataset):
    return nk.readGraph(
        join(DATASET_DIRECTORY, f"rewritten_{dataset}_graph.txt"),
        nk.Format.SNAP,
    )

def get_ground_truth(dataset):
    graph = get_graph(dataset)
    return cover_reader.read(
        join(DATASET_DIRECTORY, f"rewritten_{dataset}_gt.txt"),
        graph,
    )


In [7]:
from pathlib import Path
import json
from tqdm import tqdm


cover_reader = CoverReader()

def rewrite_cover(src: Path, dest: Path, dataset: str, sep: str=" "):
    """For algorithms that ran on unedited graphs, we need to apply
    the node mapping produced by the Rewriter.py"""

    if dest.exists():
        return

    with open(Path(DATASET_DIRECTORY) / f"node_mapping_{dataset}.json", "r") as f:
        node_mapping = json.load(f)
        
    with open(
        src,
        "r",
    ) as source:
        with open(
            dest,
            "w",
        ) as target:
            for l in tqdm(source.readlines()):
                if l[0] == "#":
                    target.write(l)
                    continue
                nodes = map(
                    lambda label: str(node_mapping[label]),
                    l.strip().split(sep)
                )
                target.write(sep.join(nodes))
                target.write("\n")   

In [8]:
def oslom_reader(output_dir: Path, graph) -> nk.Cover:
    """Point me at a OSLOM output directory and I'll return the communities
    of the first hierarchy"""
        
    return cover_reader.read(
        str(output_dir / "rewritten_tp"),
        graph,
    )

In [9]:
import pandas as pd
from pandas import DataFrame

dataframes = {}
for dataset in DATASETS:
    print(dataset)
    
    print("BigClam")
    big_clam = pd.read_csv(
        Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / "big_clam_results" / f"big_clam_{dataset}.gt_metrics",
        index_col=0,
    )
    big_clam["Algorithm"] = "big_clam"
    
    print("CoreExp")
    if dataset == "lj":
        core_exp = DataFrame([{"Dataset": "lj", "Runtime": "NaN", "F1": "NaN", "NMI Distance": "NaN"}])
    else:
        core_exp = pd.read_csv(
            Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / "core_exp_results" / f"core_expansion_{dataset}.gt_metrics",
            index_col=0,
        )
    core_exp["Algorithm"] = "core_exp"
    
    
    print("OSLOM")
    print("\tGraph")
    graph = get_graph(dataset)
    gt = get_ground_truth(dataset)
    rewrite_cover(
        Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_{dataset}" / "tp",
        Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_{dataset}" / "rewritten_tp",
        dataset,
    )
    print("\tCover")
    oslom_cover = oslom_reader(
        Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_{dataset}",
        graph,
    )
    oslom = DataFrame([{
        "Dataset": dataset,
        "Runtime": -1,
        "F1": calc_f1(graph, gt, oslom_cover),
        "NMI Distance": calc_nmi(graph, gt, oslom_cover),
        "Algorithm": "oslom",
    }])
    
    dataframes[dataset] = pd.concat([big_clam, core_exp, oslom])

eu
BigClam
CoreExp
OSLOM
	Graph
	Cover
dblp
BigClam
CoreExp
OSLOM
	Graph
	Cover
lj
BigClam
CoreExp
OSLOM
	Graph
	Cover


In [10]:
dataframes["eu"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,Algorithm
0,eu,2.773455,0.097267,0.996758,big_clam
0,eu,3.948965,0.123576,1.0,core_exp
0,eu,-1.0,0.648454,0.553498,oslom


In [11]:
dataframes["dblp"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,Algorithm
0,dblp,811.162802,0.008093,1.0,big_clam
0,dblp,115997.683404,0.031101,1.0,core_exp
0,dblp,-1.0,0.121664,0.963287,oslom


In [12]:
dataframes["lj"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,Algorithm
0,lj,12733.231291,0.002693,1.0,big_clam
0,lj,,,,core_exp
0,lj,-1.0,0.112168,0.988821,oslom


In [13]:
import os

LAZYFOX_RESULTS = Path(ALTERNATIVE_ALGOS_ROOT) / "lazyfox_results"


def get_lazyfox_cover(dataset: str, queue_size: int) -> nk.Cover:
    graph = get_graph(dataset)

    dataset_results = LAZYFOX_RESULTS / f"{dataset}_output"
    run = f"run_{dataset}_with_{queue_size}"
    
    if not os.path.isdir(dataset_results / run):
        raise KeyError(f"No run for queue_size {queue_size}")
    
    lazyfox_output = dataset_results / run / os.listdir(dataset_results / run)[0] / "iterations"
    filename = [f for f in os.listdir(lazyfox_output) if f[f.index("."):] == ".txt"][0]

    result_file = lazyfox_output / filename
    
    rewrite_cover(
        result_file,
        lazyfox_output / f"{filename}_rewritten",
        dataset,
        sep="\t",
    )

    return cover_reader.read(
        str(lazyfox_output / f"{filename}_rewritten"),
        graph,
    )

In [14]:
queue_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256]

In [15]:
from pandas import DataFrame


queue_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256]

lazyfox_datasets = {}

for dataset in tqdm(DATASETS):
    graph = get_graph(dataset)
    gt = get_ground_truth(dataset)
    
    raw_data = []
    for qsize in tqdm(queue_sizes, leave=False):
        lazyfox_cover = get_lazyfox_cover(dataset, qsize)
        f1 = calc_f1(graph, gt, lazyfox_cover)
        onmi = calc_nmi(graph, gt, lazyfox_cover)
        
        raw_data.append({
            "Dataset": dataset,
            "Runtime": -1,
            "F1": f1,
            "NMI Distance": onmi,
            "queue_size": qsize,
            "Algorithm": "lazyfox",      
        })
    
    lazyfox_datasets[dataset] = DataFrame(raw_data)
    

  0%|                                                                               | 0/3 [00:00<?, ?it/s]
  0%|                                                                               | 0/9 [00:00<?, ?it/s][A
 22%|███████████████▊                                                       | 2/9 [00:00<00:00, 18.84it/s][A
 44%|███████████████████████████████▌                                       | 4/9 [00:00<00:00, 17.59it/s][A
 33%|███████████████████████▋                                               | 1/3 [00:03<00:07,  3.73s/it][A
  0%|                                                                               | 0/9 [00:00<?, ?it/s][A
 11%|███████▉                                                               | 1/9 [00:03<00:27,  3.47s/it][A
 22%|███████████████▊                                                       | 2/9 [00:07<00:24,  3.52s/it][A
 33%|███████████████████████▋                                               | 3/9 [00:10<00:21,  3.55s/it][A
 44%|████████

In [16]:
lazyfox_datasets["eu"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,queue_size,Algorithm
0,eu,-1,0.554974,0.827041,1,lazyfox
1,eu,-1,0.558769,0.824905,2,lazyfox
2,eu,-1,0.563424,0.827895,4,lazyfox
3,eu,-1,0.548343,0.831131,8,lazyfox
4,eu,-1,0.550633,0.830473,16,lazyfox
5,eu,-1,0.55259,0.840361,32,lazyfox
6,eu,-1,0.55481,0.836442,64,lazyfox
7,eu,-1,0.557954,0.843419,128,lazyfox
8,eu,-1,0.546587,0.823678,256,lazyfox


In [17]:
lazyfox_datasets["dblp"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,queue_size,Algorithm
0,dblp,-1,0.135638,0.911933,1,lazyfox
1,dblp,-1,0.13564,0.911933,2,lazyfox
2,dblp,-1,0.135641,0.911925,4,lazyfox
3,dblp,-1,0.135641,0.911921,8,lazyfox
4,dblp,-1,0.135648,0.911922,16,lazyfox
5,dblp,-1,0.135647,0.911929,32,lazyfox
6,dblp,-1,0.135644,0.911928,64,lazyfox
7,dblp,-1,0.135656,0.911907,128,lazyfox
8,dblp,-1,0.135655,0.911919,256,lazyfox


In [18]:
lazyfox_datasets["lj"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,queue_size,Algorithm
0,lj,-1,0.186697,0.963144,1,lazyfox
1,lj,-1,0.186759,0.963132,2,lazyfox
2,lj,-1,0.186729,0.963136,4,lazyfox
3,lj,-1,0.18673,0.963168,8,lazyfox
4,lj,-1,0.186655,0.963167,16,lazyfox
5,lj,-1,0.186601,0.963184,32,lazyfox
6,lj,-1,0.18673,0.963199,64,lazyfox
7,lj,-1,0.18677,0.963198,128,lazyfox
8,lj,-1,0.186875,0.963202,256,lazyfox


In [23]:
import pickle

with open("alternative_scores.pickle", "wb") as f:
    pickle.dump(dataframes, f)
    
with open("lazyFOX_scores.pickle", "wb") as f:
    pickle.dump(lazyfox_datasets, f)