In [None]:
import os
import json
from pathlib import Path

benchmark_path = Path("../target/criterion")
maps_paths = list(map(lambda p: benchmark_path / Path(p), filter(lambda n: n != "report", os.listdir(benchmark_path))))
os.listdir()

methods = ["fast_sssp_sequential", "dijkstra", "dijkstra_fibonacci"]


# {map_name: {method_name: {metric_name: value}}}
def load_json(filename: Path) -> dict:
    with open(filename) as f:
        return json.load(f)


data = {
    m.name: {method: load_json(m / Path(method) / Path("new/estimates.json")) for method in methods}
    for m in maps_paths
}
data


In [None]:
from typing import Any

pairs = [
    ("jan_mayen", 50),
    ("gibraltar", 50),
    ("monaco", 50),
    ("san_marino", 25),
    ("andorra", 25),
    ("gotland", 25),
    ("malta", 25),
    ("reykjavik", 25),
    ("budapest", 25),
    ("luxembourg", 25),
    ("haiti", 10),
    ("iceland", 10),
    ("stockholm", 10),
    ("missisippi", 10),
    ("peru", 5),
    ("sweden", 5),
]


def normalize_recursive(item: Any, pair_count: int) -> Any:
    if isinstance(item, dict):
        return {k: (normalize_recursive(v, pair_count) if k != "confidence_level" else v) for k, v in item.items()}
    if isinstance(item, list):
        return [normalize_recursive(i, pair_count) for i in item]
    if isinstance(item, (float, int)):
        return item / pair_count
    return item


normalized_data = {area: normalize_recursive(data[area + ".osm"], pair_count) for area, pair_count in pairs}
normalized_data


In [None]:
graph_data = {
    "jan_mayen": {"edges": 29786, "nodes": 13230},
    "gibraltar": {"edges": 100284, "nodes": 44639},
    "monaco": {"edges": 72318, "nodes": 32492},
    "san_marino": {"edges": 341976, "nodes": 154249},
    "andorra": {"edges": 1043844, "nodes": 449273},
    "gotland": {"edges": 1634818, "nodes": 725852},
    "malta": {"edges": 1818642, "nodes": 734962},
    "reykjavik": {"edges": 2241396, "nodes": 1051160},
    "budapest": {"edges": 5187404, "nodes": 2443154},
    "luxembourg": {"edges": 10664130, "nodes": 3916210},
    "haiti": {"edges": 17186498, "nodes": 8497106},
    "iceland": {"edges": 21055604, "nodes": 10350896},
    "stockholm": {"edges": 16459816, "nodes": 7307104},
    "missisippi": {"edges": 21372374, "nodes": 10464418},
    "peru": {"edges": 76084784, "nodes": 37194932},
    "sweden": {"edges": 220005662, "nodes": 98808100},
}
normalized_data_with_graph = {
    area: {"methods": normalized_data[area], "graph": graph_data[area]} for area, v in normalized_data.items()
}
normalized_data_with_graph["jan_mayen"]["graph"]


In [None]:
# Optional: restrict the amount of data

normalized_data_with_graph = {k:v for k,v in normalized_data_with_graph.items() if v["graph"]["nodes"] <= 1_000_000}
filtered_methods = [m for m in methods if m != "dijkstra_fibonacci"]
filtered_methods = methods

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
plt.figure(figsize=(9, 6))
colors = {"dijkstra": "C0", "dijkstra_fibonacci": "C1", "fast_sssp_sequential": "C2"}

for method in filtered_methods:
    x = [normalized_data_with_graph[a]["graph"]["nodes"] for a in normalized_data_with_graph]
    y = [normalized_data_with_graph[a]["methods"][method]["mean"]["point_estimate"] for a in normalized_data_with_graph]
    nlogn = x*np.log(x)
    trendline = np.polyfit(nlogn, y, 1)
    p = np.poly1d(trendline)
    plt.plot(x, p(nlogn), label=f"{method} trendline", c=colors.get(method, None))
    plt.scatter(x, y, label=method, s=30, alpha=0.8, c=colors.get(method, None))

plt.xlabel("Number of nodes")
plt.ylabel("Mean time (s)")
plt.title("Mean point_estimate vs Number of nodes by method")
plt.legend(title="Method")
# plt.xscale("log")
# Add a trendline
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(15, 10))

# Sort the data by graph size (nodes)
sorted_areas = sorted(normalized_data_with_graph.keys(), 
                      key=lambda x: normalized_data_with_graph[x]['graph']['nodes'])

# Set up the positions for the bars
x = np.arange(len(sorted_areas))
width = 0.25  # Width of each bar

# Plot bars for each method
for i, method in enumerate(methods):
    if method in filtered_methods:  # Only plot the methods we filtered earlier
        means = [normalized_data_with_graph[area]['methods'][method]['mean']['point_estimate'] 
                 for area in sorted_areas]
        plt.bar(x + (i - 0.5) * width, means, width, label=method, color=colors.get(method))

# Customize the plot
plt.xlabel('Map', fontsize=12)
plt.ylabel('Mean Runtime (s)', fontsize=12)
plt.title('Mean Runtime Comparison Across Different Maps', fontsize=14)
plt.xticks(x, sorted_areas, rotation=45, ha='right')
plt.legend(title="Method")
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Add a logarithmic scale for better visibility of smaller values
# plt.yscale('log')

# Format node counts to a human readable format
def format_node_count(count):
    if count >= 1_000_000:
        return f"{count/1_000_000:.1f}M"
    elif count >= 1_000:
        return f"{count/1_000:.1f}K"
    return str(count)

# Add node count as text below the x-axis labels
node_counts = [format_node_count(normalized_data_with_graph[area]['graph']['nodes']) 
               for area in sorted_areas]
plt.gca().set_xticklabels([f"{area}\n({count} nodes)" for area, count in zip(sorted_areas, node_counts)])

plt.tight_layout()
plt.show()