# Utils

In [1]:
import networkit as nk

In [2]:
from networkit.community import CoverF1Similarity, OverlappingNMIDistance


def calc_f1(
    graph: nk.Graph,
    ground_truth: nk.Cover,
    lazy_fox_result: nk.Cover,
) -> float:
    # F1
    f1 = CoverF1Similarity(graph, ground_truth, lazy_fox_result)
    f1.run()
    return f1.getWeightedAverage()

def calc_nmi(
    graph: nk.Graph,
    ground_truth: nk.Cover,
    lazy_fox_result: nk.Cover,
) -> float:
    nmi = OverlappingNMIDistance()
    distance = nmi.getDissimilarity(graph, ground_truth, lazy_fox_result)
    return distance

In [3]:
from networkit.graphio import CoverReader
from pathlib import Path
from os import listdir


cover_reader = CoverReader()

def load_lazyfox_as_cover(outpath: Path, graph: nk.Graph) -> nk.Cover:
    """When pointed at a lazyFOX output directory,
    reads in the last iteration and returns it as networkx.Cover object"""
    files = listdir(outpath / "iterations")
    highest_iteration = 0
    for f in files:
        if "clusters" not in f:
            continue
        iteration = int(f[:f.index("clusters.txt")])
        highest_iteration = max(highest_iteration, iteration)
        
    return cover_reader.read(
        str(outpath / "iterations" / f"{highest_iteration}clusters.txt"),
        graph,
    )

In [4]:
import networkx as nx
from networkit.nxadapter import nx2nk


def load_edgelist(path: Path) -> nk.Graph:
    return nx2nk(nx.read_edgelist(path))

# Alternative Algos

In [5]:
ALTERNATIVE_ALGOS_ROOT = "/home/timgarrels/Projects/LazyFox/alternative_algos/data"
DATASET_DIRECTORY = "/home/timgarrels/Projects/LazyFox/notebooks/datasets/"
DATASETS = ["eu", "dblp", "lj"]

In [6]:
from os.path import join


def get_graph(dataset):
    return nk.readGraph(
        join(DATASET_DIRECTORY, f"rewritten_{dataset}_graph.txt"),
        nk.Format.SNAP,
    )

def get_ground_truth(dataset):
    graph = get_graph(dataset)
    return cover_reader.read(
        join(DATASET_DIRECTORY, f"rewritten_{dataset}_gt.txt"),
        graph,
    )


In [7]:
from pathlib import Path
import json
from tqdm import tqdm


cover_reader = CoverReader()

def rewrite_cover(src: Path, dest: Path, dataset: str, sep: str=" "):
    """For algorithms that ran on unedited graphs, we need to apply
    the node mapping produced by the Rewriter.py"""

    if dest.exists():
        return

    with open(Path(DATASET_DIRECTORY) / f"node_mapping_{dataset}.json", "r") as f:
        node_mapping = json.load(f)
        
    with open(
        src,
        "r",
    ) as source:
        with open(
            dest,
            "w",
        ) as target:
            for l in tqdm(source.readlines()):
                if l[0] == "#":
                    target.write(l)
                    continue
                nodes = map(
                    lambda label: str(node_mapping[label]),
                    l.strip().split(sep)
                )
                target.write(sep.join(nodes))
                target.write("\n")   

In [8]:
def oslom_reader(output_dir: Path, graph, level=0) -> nk.Cover:
    """Point me at a OSLOM output directory and I'll return the communities
    of the first hierarchy"""
    postfix = "" if level == 0 else str(level)
    rewritten_cover_name = f"rewritten_tp{postfix}"
    
    return cover_reader.read(
        str(output_dir / rewritten_cover_name),
        graph,
    )

In [9]:
def get_oslom_result(dataset, graph=None, gt=None, level=0):
    print("OSLOM")
    if graph is None:
        print("\tGraph")
        graph = get_graph(dataset)
    if gt is None:
        print("\tGT")
        gt = get_ground_truth(dataset)
        
    postfix = "" if level == 0 else str(level)
    
    cover_path = Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_{dataset}" / f"tp{postfix}"
    rewrite_path = Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_{dataset}" / f"rewritten_tp{postfix}"
    
    print("\tRewrite")
    rewrite_cover(
        cover_path,
        rewrite_path,
        dataset,
    )
    print("\tCover")
    oslom_cover = oslom_reader(
        Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_{dataset}",
        graph,
        level=level
    )
    return {
        "Dataset": dataset,
        "Runtime": -1,
        "F1": calc_f1(graph, gt, oslom_cover),
        "NMI Distance": calc_nmi(graph, gt, oslom_cover),
        "Algorithm": "oslom",
    }    

In [10]:
import pandas as pd
from pandas import DataFrame

dataframes = {}
for dataset in DATASETS:
    print(dataset)
    
    print("BigClam")
    big_clam = pd.read_csv(
        Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / "big_clam_results" / f"big_clam_{dataset}.gt_metrics",
        index_col=0,
    )
    big_clam["Algorithm"] = "big_clam"
    
    print("CoreExp")
    if dataset == "lj":
        core_exp = DataFrame([{"Dataset": "lj", "Runtime": "NaN", "F1": "NaN", "NMI Distance": "NaN"}])
    else:
        core_exp = pd.read_csv(
            Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / "core_exp_results" / f"core_expansion_{dataset}.gt_metrics",
            index_col=0,
        )
    core_exp["Algorithm"] = "core_exp"

    oslom = DataFrame([
        get_oslom_result(dataset)
    ])
    
    dataframes[dataset] = pd.concat([big_clam, core_exp, oslom])

eu
BigClam
CoreExp
OSLOM
	Graph
	GT
	Rewrite
	Cover
dblp
BigClam
CoreExp
OSLOM
	Graph
	GT
	Rewrite
	Cover
lj
BigClam
CoreExp
OSLOM
	Graph
	GT
	Rewrite
	Cover


In [11]:
dataframes["eu"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,Algorithm
0,eu,2.773455,0.097267,0.996758,big_clam
0,eu,3.948965,0.123576,1.0,core_exp
0,eu,-1.0,0.648454,0.553498,oslom


In [12]:
dataframes["dblp"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,Algorithm
0,dblp,811.162802,0.008093,1.0,big_clam
0,dblp,115997.683404,0.031101,1.0,core_exp
0,dblp,-1.0,0.121664,0.963287,oslom


In [13]:
dataframes["lj"]

Unnamed: 0,Dataset,Runtime,F1,NMI Distance,Algorithm
0,lj,12733.231291,0.002693,1.0,big_clam
0,lj,,,,core_exp
0,lj,-1.0,0.112168,0.988821,oslom


In [14]:
import os

LAZYFOX_RESULTS = Path(ALTERNATIVE_ALGOS_ROOT) / "lazyfox_results"


def get_lazyfox_cover(dataset: str, queue_size: int) -> nk.Cover:
    graph = get_graph(dataset)

    dataset_results = LAZYFOX_RESULTS / f"{dataset}_output"
    run = f"run_{dataset}_with_{queue_size}"
    
    if not os.path.isdir(dataset_results / run):
        raise KeyError(f"No run for queue_size {queue_size}")
    
    lazyfox_output = dataset_results / run / os.listdir(dataset_results / run)[0] / "iterations"
    filename = [f for f in os.listdir(lazyfox_output) if f[f.index("."):] == ".txt"][0]

    result_file = lazyfox_output / filename
    
    rewrite_cover(
        result_file,
        lazyfox_output / f"{filename}_rewritten",
        dataset,
        sep="\t",
    )

    return cover_reader.read(
        str(lazyfox_output / f"{filename}_rewritten"),
        graph,
    )

In [15]:
queue_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256]

In [16]:
from pandas import DataFrame


queue_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256]

lazyfox_datasets = {}

for dataset in tqdm(DATASETS):
    graph = get_graph(dataset)
    gt = get_ground_truth(dataset)
    
    raw_data = []
    for qsize in tqdm(queue_sizes, leave=False):
        lazyfox_cover = get_lazyfox_cover(dataset, qsize)
        f1 = calc_f1(graph, gt, lazyfox_cover)
        onmi = calc_nmi(graph, gt, lazyfox_cover)
        
        raw_data.append({
            "Dataset": dataset,
            "Runtime": -1,
            "F1": f1,
            "NMI Distance": onmi,
            "queue_size": qsize,
            "Algorithm": "lazyfox",      
        })
    
    lazyfox_datasets[dataset] = DataFrame(raw_data)
    

  0%|                                                                               | 0/3 [00:00<?, ?it/s]
  0%|                                                                               | 0/9 [00:00<?, ?it/s][A
                                                                                                          [A
  0%|                                                                               | 0/9 [00:00<?, ?it/s][A
 11%|███████▉                                                               | 1/9 [00:02<00:22,  2.78s/it][A
 22%|███████████████▊                                                       | 2/9 [00:05<00:20,  2.88s/it][A
 33%|███████████████████████▋                                               | 3/9 [00:08<00:17,  2.89s/it][A
 44%|███████████████████████████████▌                                       | 4/9 [00:11<00:14,  2.86s/it][A
 56%|███████████████████████████████████████▍                               | 5/9 [00:14<00:11,  2.85s/it][A
 67%|████████

KeyboardInterrupt: 

In [None]:
lazyfox_datasets["eu"]

In [None]:
lazyfox_datasets["dblp"]

In [None]:
lazyfox_datasets["lj"]

In [None]:
import pickle

with open("alternative_scores.pickle", "wb") as f:
    pickle.dump(dataframes, f)
    
with open("lazyFOX_scores.pickle", "wb") as f:
    pickle.dump(lazyfox_datasets, f)

In [28]:

tp_pattern = re.compile("tp\d*")
oslom_out = Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_eu"

tp_files = [f for f in listdir(oslom_out) if tp_pattern.match(f)]
tp_files

['tp1', 'tp2', 'tp']

In [None]:
# Check OSLOM higher hierarchy resultsimport re

tp_pattern = re.compile("tp\d*")
def find_osolom_hierarchies(dataset):
    oslom_out = Path(ALTERNATIVE_ALGOS_ROOT) / "other_algo_results" / f"oslom_{dataset}"
    
    tp_files = [f for f in listdir(oslom_out) if tp_pattern.match(f)]
    return range(len(tp_files))

oslom_dataframes = {}
pbar = tqdm(DATASETS, desc="Datasets")
for dataset in pbar:
    pbar.set_description(f"Datasets: {dataset}")
    levels = find_osolom_hierarchies(dataset)
    raw_data = []
    for l in tqdm(levels, desc="level", leave=False):
        r = get_oslom_result(dataset, level=l)
        r["level"] = l
        raw_data.append(r)
    oslom_dataframes[dataset] = DataFrame(raw_data)

Datasets: eu:   0%|                                                                 | 0/3 [00:00<?, ?it/s]
level: 100%|████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 51.85it/s][A
Datasets: dblp:   0%|                                                               | 0/3 [00:00<?, ?it/s]

OSLOM
	Graph
	GT
	Rewrite
	Cover
OSLOM
	Graph
	GT
	Rewrite
	Cover
OSLOM
	Graph
	GT
	Rewrite
	Cover



level:   0%|                                                                        | 0/9 [00:00<?, ?it/s][A

OSLOM
	Graph
	GT
	Rewrite
	Cover



level:  11%|███████                                                         | 1/9 [00:04<00:33,  4.14s/it][A

OSLOM
	Graph
	GT
	Rewrite
	Cover



level:  22%|██████████████▏                                                 | 2/9 [00:08<00:28,  4.14s/it][A

OSLOM
	Graph
	GT
	Rewrite
	Cover



level:  33%|█████████████████████▎                                          | 3/9 [00:12<00:24,  4.11s/it][A

OSLOM
	Graph
	GT
	Rewrite
	Cover



level:  44%|████████████████████████████▍                                   | 4/9 [00:16<00:20,  4.09s/it][A

OSLOM
	Graph
	GT
	Rewrite




  0%|                                                                            | 0/2394 [00:00<?, ?it/s][A[A

  4%|██▊                                                             | 106/2394 [00:00<00:02, 1054.49it/s][A[A

100%|███████████████████████████████████████████████████████████████| 2394/2394 [00:00<00:00, 9178.11it/s][A[A


	Cover



level:  56%|███████████████████████████████████▌                            | 5/9 [00:20<00:16,  4.20s/it][A

OSLOM
	Graph
	GT
	Rewrite




  0%|                                                                            | 0/2022 [00:00<?, ?it/s][A[A

  1%|▍                                                                 | 12/2022 [00:00<00:17, 115.10it/s][A[A

100%|███████████████████████████████████████████████████████████████| 2022/2022 [00:00<00:00, 6735.97it/s][A[A


	Cover



level:  67%|██████████████████████████████████████████▋                     | 6/9 [00:25<00:12,  4.25s/it][A

OSLOM
	Graph
	GT
	Rewrite




  0%|                                                                             | 0/840 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████████████████| 840/840 [00:00<00:00, 3746.85it/s][A[A


	Cover



level:  78%|█████████████████████████████████████████████████▊              | 7/9 [00:29<00:08,  4.20s/it][A

OSLOM
	Graph
	GT
	Rewrite




  0%|                                                                             | 0/576 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████████████████| 576/576 [00:00<00:00, 2677.27it/s][A[A


	Cover



level:  89%|████████████████████████████████████████████████████████▉       | 8/9 [00:33<00:04,  4.17s/it][A

OSLOM
	Graph
	GT
	Rewrite




  0%|                                                                             | 0/574 [00:00<?, ?it/s][A[A

100%|█████████████████████████████████████████████████████████████████| 574/574 [00:00<00:00, 2761.45it/s][A[A


	Cover



level: 100%|████████████████████████████████████████████████████████████████| 9/9 [00:37<00:00,  4.16s/it][A
Datasets: lj:  67%|██████████████████████████████████████                   | 2/3 [00:37<00:18, 18.75s/it]
level:   0%|                                                                        | 0/1 [00:00<?, ?it/s][A

OSLOM
	Graph


In [None]:
oslom_dataframes["eu"]