In [1]:
# %%
# set pwd to ..
import os

os.chdir('..')
from config import graph_info_json_path, graph_benchmark_method_to_color
import json
gi = json.loads(graph_info_json_path.read_text())
# get pwd
os.getcwd()

In [2]:
# df = profile_results.csv
import pandas as pd
df = pd.read_csv('profile_results.csv')
# only keep rows with 'tool' containing '0330' and 'fix'
df = df[df['tool'].str.contains('20230331-all-without-google-pokec')]
from utils_other import get_pretty_graph_name, get_pretty_tool_name
# replace dataset (graph) and tool cols with pretty cols
df['dataset'] = df['dataset'].apply(get_pretty_graph_name)
df['tool'] = df['tool'].apply(get_pretty_tool_name)
# drop cols with only NaNs
df = df.dropna(axis=1, how='all')

# save as csv
df.to_csv('20230331-all-without-google-pokec.csv', index=False)
df

Unnamed: 0,dataset,method,tool,average_time,iteration_count
363,cheminformatics,loading,igraph,0.000109,2023-03-30 23:40:17.419550
364,cheminformatics,shortest path,igraph,0.000008,2023-03-30 23:40:17.419550
365,cheminformatics,page rank,igraph,0.000010,2023-03-30 23:40:17.419550
366,cheminformatics,betweenness centrality,igraph,0.000069,2023-03-30 23:40:17.419550
367,cheminformatics,closeness centrality,igraph,0.000040,2023-03-30 23:40:17.419550
...,...,...,...,...,...
665,amazon_lcc,loading,igraph,0.705498,2023-04-02 03:26:47.259389
666,amazon_lcc,shortest path,igraph,0.321057,2023-04-02 03:26:47.259389
667,amazon_lcc,betweenness centrality,igraph,36621.778889,2023-04-02 03:26:47.259389
668,amazon_lcc,closeness centrality,igraph,8051.112730,2023-04-02 03:26:47.259389


In [None]:
from utils_other import style_text
def compare_igraph_and_easygraph(df: pd.DataFrame, method_to_style_map: dict[str, str] = graph_benchmark_method_to_color) -> dict[str, list[str]]:
    # for each dataset and method, is there instances where easygraph's average_time is lower than igraph's?
    # if igraph's is lower, by what %?
    # use this formula:
    # (slower_time - faster_time) / slower_time
    # i.e. 1 - faster_time / slower_time
    # convert to percentage, and keep 2 decimal points
    # generate code to answer this question
    lines: dict[str, list[str]] = {'easygraph': [], 'igraph': [], 'skipped': []}
    for dataset in df['dataset'].unique():
        is_directed = gi[dataset]['is_directed']
        df_dataset = df[df['dataset'] == dataset]
        for method in df_dataset['method'].unique():
            style = method_to_style_map[method]
            df_method = df_dataset[df_dataset['method'] == method]
            easygraph_avg_time = df_method[df_method['tool'] == 'easygraph']['average_time'].mean()
            igraph_avg_time = df_method[df_method['tool'] == 'igraph']['average_time'].mean()
            if pd.isna(easygraph_avg_time) or pd.isna(igraph_avg_time):
                lines['skipped'].append(style_text(f"skipping {dataset} on {method}", "strike"))
                continue
            if easygraph_avg_time < igraph_avg_time:
                lines['easygraph'].append(f"{style_text('easygraph', 'green')} is {round(100 * (1 - easygraph_avg_time / igraph_avg_time), 2)}% faster than igraph on {style_text(dataset, 'bold underline')} ({style_text(method, style)}, {easygraph_avg_time:.2e} vs {igraph_avg_time:.2e}){' (Directed)' if is_directed else ''}")
            else:
                    lines['igraph'].append(f"igraph is {round(100 * (1 - igraph_avg_time / easygraph_avg_time), 2)}% faster than easygraph on {style_text(dataset, 'bold underline')} ({style_text(method, style)}, {igraph_avg_time:.2e} vs {easygraph_avg_time:.2e}){' (Directed)' if is_directed else ''}")
    return lines

lines = compare_igraph_and_easygraph(df)

In [4]:
for k, v in lines.items():
    if k == 'skipped':
        continue
    print(f"{k} faster: {len(v)} instances")
    for line in v:
        print(line)

easygraph faster: 15 instances
[32measygraph[0m is 11.24% faster than igraph on [1;4mcheminformatics[0m ([32mshortest path[0m, 7.01e-06 vs 7.89e-06) (Directed)
[32measygraph[0m is 9.7% faster than igraph on [1;4mcheminformatics[0m ([34mpage rank[0m, 9.38e-06 vs 1.04e-05) (Directed)
[32measygraph[0m is 59.5% faster than igraph on [1;4meco[0m ([32mshortest path[0m, 2.65e-04 vs 6.55e-04)
[32measygraph[0m is 63.42% faster than igraph on [1;4mpgp[0m ([32mshortest path[0m, 9.66e-03 vs 2.64e-02) (Directed)
[32measygraph[0m is 83.06% faster than igraph on [1;4mpgp[0m ([34mpage rank[0m, 9.47e-03 vs 5.59e-02) (Directed)
[32measygraph[0m is 38.65% faster than igraph on [1;4mroad[0m ([32mshortest path[0m, 4.03e-03 vs 6.56e-03)
[32measygraph[0m is 95.55% faster than igraph on [1;4mamazon[0m ([32mshortest path[0m, 1.05e-02 vs 2.35e-01) (Directed)
[32measygraph[0m is 13.7% faster than igraph on [1;4mamazon[0m ([31mbetweenness centrality[0m, 2.02e+04 vs 