In [1]:
# %%
# set pwd to ..
import os

os.chdir('..')
from config import graph_info_json_path, graph_benchmark_method_to_color
import json
gi = json.loads(graph_info_json_path.read_text())
# get pwd
os.getcwd()

In [2]:
# df = profile_results.csv
import pandas as pd
df = pd.read_csv('profile_results.csv')
# only keep rows with 'tool' containing '0330' and 'fix'
df = df[df['tool'].str.contains('20230403-lastfm')]
from utils_other import get_pretty_graph_name, get_pretty_tool_name
# replace dataset (graph) and tool cols with pretty cols
df['dataset'] = df['dataset'].apply(get_pretty_graph_name)
df['tool'] = df['tool'].apply(get_pretty_tool_name)
# drop cols with only NaNs
df = df.dropna(axis=1, how='all')

# save as csv
df.to_csv('20230403-lastfm.csv', index=False)
df

Unnamed: 0,dataset,method,tool,average_time,iteration_count
722,lastfm,loading,igraph,0.016798,2023-04-03 04:28:45.966466
723,lastfm,shortest path,igraph,0.003677,2023-04-03 04:28:45.966466
724,lastfm,betweenness centrality,igraph,6.376532,2023-04-03 04:28:45.966466
725,lastfm,closeness centrality,igraph,3.043893,2023-04-03 04:28:45.966466
726,lastfm,k-core,igraph,0.000798,2023-04-03 04:28:45.966466
727,lastfm,loading,easygraph,0.150499,2023-04-03 04:29:30.153074
728,lastfm,shortest path,easygraph,0.001859,2023-04-03 04:29:30.153074
729,lastfm,betweenness centrality,easygraph,15.868699,2023-04-03 04:29:30.153074
730,lastfm,closeness centrality,easygraph,12.148326,2023-04-03 04:29:30.153074
731,lastfm,k-core,easygraph,0.020747,2023-04-03 04:29:30.153074


In [None]:
from utils_other import style_text
def compare_igraph_and_easygraph(df: pd.DataFrame, method_to_style_map: dict[str, str] = graph_benchmark_method_to_color) -> dict[str, list[str]]:
    # for each dataset and method, is there instances where easygraph's average_time is lower than igraph's?
    # if igraph's is lower, by what %?
    # use this formula:
    # (slower_time - faster_time) / slower_time
    # i.e. 1 - faster_time / slower_time
    # convert to percentage, and keep 2 decimal points
    # generate code to answer this question
    lines: dict[str, list[str]] = {'easygraph': [], 'igraph': [], 'skipped': []}
    for dataset in df['dataset'].unique():
        is_directed = gi[dataset]['is_directed']
        df_dataset = df[df['dataset'] == dataset]
        for method in df_dataset['method'].unique():
            style = method_to_style_map[method]
            df_method = df_dataset[df_dataset['method'] == method]
            easygraph_avg_time = df_method[df_method['tool'] == 'easygraph']['average_time'].mean()
            igraph_avg_time = df_method[df_method['tool'] == 'igraph']['average_time'].mean()
            if pd.isna(easygraph_avg_time) or pd.isna(igraph_avg_time):
                lines['skipped'].append(style_text(f"skipping {dataset} on {method}", "strike"))
                continue
            if easygraph_avg_time < igraph_avg_time:
                lines['easygraph'].append(f"{style_text('easygraph', 'green')} is {round(100 * (1 - easygraph_avg_time / igraph_avg_time), 2)}% faster than igraph on {style_text(dataset, 'bold underline')} ({style_text(method, style)}, {easygraph_avg_time:.2e} vs {igraph_avg_time:.2e}){' (Directed)' if is_directed else ''}")
            else:
                    lines['igraph'].append(f"igraph is {round(100 * (1 - igraph_avg_time / easygraph_avg_time), 2)}% faster than easygraph on {style_text(dataset, 'bold underline')} ({style_text(method, style)}, {igraph_avg_time:.2e} vs {easygraph_avg_time:.2e}){' (Directed)' if is_directed else ''}")
    return lines

lines = compare_igraph_and_easygraph(df)

In [4]:
for k, v in lines.items():
    if k == 'skipped':
        continue
    print(f"{k} faster: {len(v)} instances")
    for line in v:
        print(line)

easygraph faster: 2 instances
[32measygraph[0m is 49.46% faster than igraph on [1;4mlastfm[0m ([32mshortest path[0m, 1.86e-03 vs 3.68e-03)
[32measygraph[0m is 49.41% faster than igraph on [1;4mlastfm_lcc[0m ([32mshortest path[0m, 1.86e-03 vs 3.68e-03)
igraph faster: 8 instances
igraph is 88.84% faster than easygraph on [1;4mlastfm[0m ([33mloading[0m, 1.68e-02 vs 1.50e-01)
igraph is 59.82% faster than easygraph on [1;4mlastfm[0m ([31mbetweenness centrality[0m, 6.38e+00 vs 1.59e+01)
igraph is 74.94% faster than easygraph on [1;4mlastfm[0m ([35mcloseness centrality[0m, 3.04e+00 vs 1.21e+01)
igraph is 96.15% faster than easygraph on [1;4mlastfm[0m ([38;5;129mk-core[0m, 7.98e-04 vs 2.07e-02)
igraph is 88.6% faster than easygraph on [1;4mlastfm_lcc[0m ([33mloading[0m, 1.69e-02 vs 1.48e-01)
igraph is 58.82% faster than easygraph on [1;4mlastfm_lcc[0m ([31mbetweenness centrality[0m, 6.38e+00 vs 1.55e+01)
igraph is 74.77% faster than easygraph on [1;4mlastfm