In [None]:
import os
import json
from pathlib import Path
benchmark_path = Path("../target/criterion")
maps_paths = list(map(lambda p: benchmark_path / Path(p), filter(lambda n: n != "report", os.listdir(benchmark_path))))
os.listdir()

methods = ["fast_sssp_sequential", "dijkstra", "dijkstra_fibonacci"]


# {map_name: {method_name: {metric_name: value}}}
def load_json(filename: Path) -> dict:
    with open(filename) as f:
        return json.load(f)


data = {
    m.name: {method: load_json(m / Path(method) / Path("new/estimates.json")) for method in methods}
    for m in maps_paths
}
data


In [None]:
from typing import Any

pairs = [
    ("jan_mayen", 500),
    ("gibraltar", 500),
    ("monaco", 500),
    ("san_marino", 200),
    ("andorra", 150),
    ("gotland", 150),
    ("malta", 150),
    ("reykjavik", 150),
    ("budapest", 150),
    ("luxembourg", 100),
    ("haiti", 100),
    ("iceland", 100),
    ("stockholm", 100),
    ("missisippi", 100),
]


def normalize_recursive(item: Any, pair_count: int) -> Any:
    if isinstance(item, dict):
        return {k: (normalize_recursive(v, pair_count) if k != "confidence_level" else v) for k, v in item.items()}
    if isinstance(item, list):
        return [normalize_recursive(i, pair_count) for i in item]
    if isinstance(item, (float, int)):
        return item / pair_count
    return item


normalized_data = {area: normalize_recursive(data[area + ".osm"], pair_count) for area, pair_count in pairs}
normalized_data


In [None]:
print(f"Fibonacci run times")
for area, v in normalized_data.items():
    method_data = v["dijkstra_fibonacci"]
    print(f"{area} \t  \t {method_data['mean']['point_estimate']}")


In [None]:
graph_data = {
    "jan_mayen": {"edges": 29_786, "nodes": 13_230},
    "monaco": {"edges": 72_318, "nodes": 32_492},
    "gibraltar": {"edges": 100_284, "nodes": 44_639},
    "san_marino": {"edges": 341_976, "nodes": 154_249},
    "andorra": {"edges": 1043_844, "nodes": 449_273},
    "gotland": {"edges": 1634_818, "nodes": 725_852},
    "malta": {"edges": 1_818_642, "nodes": 734_962},

    "reykjavik": {"edges": 2_241_396, "nodes": 1_051_160},
    "budapest": {"edges": 5_187_404, "nodes": 2_443_154},
    "luxembourg": {"edges": 10_664_130, "nodes": 3_916_210},
    "haiti": {"edges": 17_186_498, "nodes": 8_497_106},

    "iceland": {"edges": 21_055_604, "nodes": 10_350_896},
    "missisippi": {"edges": 21_372_374, "nodes": 10_464_418},
    "stockholm": {"edges": 16_459_816, "nodes": 73_07_104},
}
normalized_data_with_graph = {
    area: {"methods": normalized_data[area], "graph": graph_data[area]} for area, v in normalized_data.items()
}
normalized_data_with_graph["jan_mayen"]["graph"]


In [None]:
# Optional: restrict the amount of data

# Split the data by graph size (nodes); <= 1_000_000 nodes, between 1_000_000 and 10_000_000 nodes, and > 10_000_000 nodes
small_max = 1_000_000
medium_max = 11_000_000

split_graph_data = {
    'small': {k: v for k, v in normalized_data_with_graph.items() if v["graph"]["nodes"] <= small_max},
    'medium': {k: v for k, v in normalized_data_with_graph.items() if small_max < v["graph"]["nodes"] <= medium_max},
    'large': {k: v for k, v in normalized_data_with_graph.items() if v["graph"]["nodes"] > medium_max},
}

filtered_methods = [m for m in methods if m != "dijkstra_fibonacci"]


In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
colors = {"dijkstra": "C0", "dijkstra_fibonacci": "C1", "fast_sssp_sequential": "C2"}
small_data = split_graph_data['small']
sm_data = {**small_data, **split_graph_data['medium']}
algo_labels = {"dijkstra": "Dijkstra (Binary Heap)", "dijkstra_fibonacci": "Dijkstra (Fibonacci Heap)",
               "fast_sssp_sequential": "Duan et al."}

In [None]:
# Create LaTeX table from normalized_data_with_graph
table_data = []
for area, data in normalized_data_with_graph.items():
    row = {'Area': area.replace('_', ' ').title()}

    for method in methods:
        method_data = data['methods'][method]
        mean = method_data['mean']['point_estimate']
        std_err = method_data['mean']['standard_error']

        # Use the labels dictionary for method names
        method_label = algo_labels.get(method, method)
        row[f'{method_label}_Mean'] = f'{mean:.2e}'
        row[f'{method_label}_StdErr'] = f'{std_err:.2e}'

    table_data.append(row)

header_lines = []
header_lines.append("\\begin{tabularx}{\\textwidth}{|l|" + "cc|" * len(methods) + "}")
header_lines.append("\\hline")
first_row = "Area"
for method in methods:
    method_label = algo_labels.get(method, method)
    first_row += f" & \\multicolumn{{2}}{{c|}}{{{method_label}}}"
first_row += " \\\\"
header_lines.append(first_row)

second_row = ""
for i, method in enumerate(methods):
    if i == 0:
        second_row += " & Mean & Standard Error"
    else:
        second_row += " & Mean & Standard Error"
second_row += " \\\\"

header_lines.append("\\hline")
header_lines.append(second_row)
header_lines.append("\\hline")

# Data rows
for area, data in normalized_data_with_graph.items():
    data_row = area.replace('_', ' ').title()
    for method in methods:
        method_data = data['methods'][method]
        mean = method_data['mean']['point_estimate']
        std_err = method_data['mean']['standard_error']
        data_row += f" & {mean:.2e} & {std_err:.2e}"
    data_row += " \\\\"
    header_lines.append(data_row)

header_lines.append("\\hline")
header_lines.append("\\end{tabularx}")

# Print the LaTeX table
latex_output = "\n".join(header_lines)
print(latex_output)


In [None]:
from matplotlib import ticker
from scipy.optimize import curve_fit

In [None]:
this_data = sm_data
#  Define missing functions and variables
def calculate_r_squared(y_actual, y_predicted):
    ss_res = np.sum((y_actual - y_predicted) ** 2)
    ss_tot = np.sum((y_actual - np.mean(y_actual)) ** 2)
    return 1 - (ss_res / ss_tot)

# Get average degree from graph data
avg_degree = np.mean([graph_data[area.replace('_', ' ').lower().replace(' ', '_')]["edges"] / graph_data[area.replace('_', ' ').lower().replace(' ', '_')]["nodes"] for area in sm_data.keys() if area.replace('_', ' ').lower().replace(' ', '_') in graph_data])

# Define curve fitting functions
mp = {
    'dijkstra': lambda v, a, b: a * (avg_degree * v + v) * np.log(v) + b,
    'dijkstra_fibonacci': lambda v, a, b: a * ((avg_degree * v) + v * np.log(v)) + b,
    'fast_sssp_sequential': lambda v, a, b: a * ((avg_degree * v) * np.log(v) ** 2 / 3) + b,
}

def sign_of(s: float) -> str:
    return "+" if s >= 0 else "-"

# Define formula formatting functions
formulas = {
    'dijkstra': lambda a, b: f'{a:.2f} * ({avg_degree:.2f} * v + v) * log(v) {sign_of(b)} {np.abs(b):,.2f}',
    'dijkstra_fibonacci': lambda a, b: f'{a:.2f} * ({avg_degree:.2f} * v + v * log(v)) {sign_of(b)} {np.abs(b):,.2f}',
    'fast_sssp_sequential': lambda a, b: f'{a:.2f} * ({avg_degree:.2f} * v * log(v)^(2/3)) {sign_of(b)} {np.abs(b):,.2f}',
}


# Create combined plot with dijkstra and fast_sssp_sequential
plt.figure(figsize=(12, 8))

for method in filtered_methods:
    x = np.array([normalized_data_with_graph[a]["graph"]["nodes"] for a in this_data])
    y = np.array([normalized_data_with_graph[a]["methods"][method]["mean"]["point_estimate"] for a in
                  this_data])
    z = mp[method]

    fitted_params, pcov = curve_fit(z, x, y)

    # Generate points for the trendline
    x_sorted = np.sort(x)
    y_fitted = z(x_sorted, *fitted_params)

    # Calculate R-squared for the fit
    y_pred = z(x, *fitted_params)
    r_squared = calculate_r_squared(y, y_pred)

    formula = formulas[method](fitted_params[0], fitted_params[1])
    
    # Plot trendline and scatter for each method
    plt.plot(x_sorted, y_fitted,
             label=f"{algo_labels[method]} Trendline: {formula}, R² = {r_squared:.3f}",
             c=colors.get(method, None), linestyle='-')
    plt.scatter(x, y, label=algo_labels[method], s=30, alpha=0.8, c=colors.get(method, None))

plt.xlabel("Number of nodes")
plt.ylabel("Mean time (s)")

plt.gca().xaxis.set_major_formatter(
    ticker.FuncFormatter(
        lambda x, p: f'{x / 1e6:.0f}M' if x >= 1e6 else f'{x / 1e3:.0f}k' if x >= 1e3 else f'{x:.0f}'))
plt.gca().yaxis.set_major_formatter(
    ticker.FuncFormatter(
        lambda x, p: f'{x / 1e6:.0f}' if x >= 1e6 else f'{x / 1e3:.0f}' if x >= 1e3 else f'{x:.0f}'))

plt.legend()
plt.tight_layout()
Path("graphs").mkdir(exist_ok=True)
plt.savefig("graphs/combined_dijkstra_fast_sssp.png")
plt.show()

# Remove the first combined graph - keep only the second graph with all three algorithms
plt.figure(figsize=(12, 8))

for method in methods:  # Use all methods including fibonacci
    x = np.array([normalized_data_with_graph[a]["graph"]["nodes"] for a in this_data])
    y = np.array([normalized_data_with_graph[a]["methods"][method]["mean"]["point_estimate"] for a in
                  this_data])
    z = mp[method]

    fitted_params, pcov = curve_fit(z, x, y)

    # Generate points for the trendline
    x_sorted = np.sort(x)
    y_fitted = z(x_sorted, *fitted_params)

    # Calculate R-squared for the fit
    y_pred = z(x, *fitted_params)
    r_squared = calculate_r_squared(y, y_pred)

    formula = formulas[method](fitted_params[0], fitted_params[1])
    
    # Plot trendline and scatter for each method
    plt.plot(x_sorted, y_fitted,
             label=f"{algo_labels[method]} Trendline: {formula}, R² = {r_squared:.3f}",
             c=colors.get(method, None), linestyle='-')
    plt.scatter(x, y, label=algo_labels[method], s=30, alpha=0.8, c=colors.get(method, None))

plt.xlabel("Number of nodes")
plt.ylabel("Mean time (s)")

plt.gca().xaxis.set_major_formatter(
    ticker.FuncFormatter(
        lambda x, p: f'{x / 1e6:.0f}M' if x >= 1e6 else f'{x / 1e3:.0f}k' if x >= 1e3 else f'{x:.0f}'))
plt.gca().yaxis.set_major_formatter(
    ticker.FuncFormatter(
        lambda x, p: f'{x / 1e6:.0f}' if x >= 1e6 else f'{x / 1e3:.0f}' if x >= 1e3 else f'{x:.0f}'))

plt.legend()
plt.tight_layout()
plt.savefig("graphs/mean_times_trendlines.png")
plt.show()


In [None]:
# New version of the above with enlarged labels
def calculate_r_squared(y_actual, y_predicted):
    ss_res = np.sum((y_actual - y_predicted) ** 2)
    ss_tot = np.sum((y_actual - np.mean(y_actual)) ** 2)
    return 1 - (ss_res / ss_tot)

# Get average degree from graph data
avg_degree = np.mean([graph_data[area.replace('_', ' ').lower().replace(' ', '_')]["edges"] / graph_data[area.replace('_', ' ').lower().replace(' ', '_')]["nodes"] for area in sm_data.keys() if area.replace('_', ' ').lower().replace(' ', '_') in graph_data])

# Define curve fitting functions
mp = {
    'dijkstra': lambda v, a, b: a * (avg_degree * v + v) * np.log(v) + b,
    'dijkstra_fibonacci': lambda v, a, b: a * ((avg_degree * v) + v * np.log(v)) + b,
    'fast_sssp_sequential': lambda v, a, b: a * ((avg_degree * v) * np.log(v) ** 2 / 3) + b,
}

def sign_of(s: float) -> str:
    return "+" if s >= 0 else "-"

# Define formula formatting functions
formulas = {
    'dijkstra': lambda a, b: f'{a:.2f} * ({avg_degree:.2f} * v + v) * log(v) {sign_of(b)} {np.abs(b):,.2f}',
    'dijkstra_fibonacci': lambda a, b: f'{a:.2f} * ({avg_degree:.2f} * v + v * log(v)) {sign_of(b)} {np.abs(b):,.2f}',
    'fast_sssp_sequential': lambda a, b: f'{a:.2f} * ({avg_degree:.2f} * v * log(v)^(2/3)) {sign_of(b)} {np.abs(b):,.2f}',
}

# Create graph with all three algorithms
plt.figure(figsize=(12, 8))

for method in methods:  # Use all methods including fibonacci
    x = np.array([normalized_data_with_graph[a]["graph"]["nodes"] for a in this_data])
    y = np.array([normalized_data_with_graph[a]["methods"][method]["mean"]["point_estimate"] for a in
                  this_data])
    z = mp[method]

    fitted_params, pcov = curve_fit(z, x, y)

    # Generate points for the trendline
    x_sorted = np.sort(x)
    y_fitted = z(x_sorted, *fitted_params)

    # Calculate R-squared for the fit
    y_pred = z(x, *fitted_params)
    r_squared = calculate_r_squared(y, y_pred)

    formula = formulas[method](fitted_params[0], fitted_params[1])
    
    # Plot trendline and scatter for each method
    plt.plot(x_sorted, y_fitted,
             label=f"{algo_labels[method]} Trendline (R² = {r_squared:.3f})",
             c=colors.get(method, None), linestyle='-')
    plt.scatter(x, y, label=algo_labels[method], s=30, alpha=0.8, c=colors.get(method, None))

plt.xlabel("Number of nodes")
plt.ylabel("Mean time (s)")

plt.gca().xaxis.set_major_formatter(
    ticker.FuncFormatter(
        lambda x, p: f'{x / 1e6:.0f}M' if x >= 1e6 else f'{x / 1e3:.0f}k' if x >= 1e3 else f'{x:.0f}'))
plt.gca().yaxis.set_major_formatter(
    ticker.FuncFormatter(
        lambda x, p: f'{x / 1e6:.0f}' if x >= 1e6 else f'{x / 1e3:.0f}' if x >= 1e3 else f'{x:.0f}'))

plt.legend(fontsize=12)
plt.tight_layout()
Path("graphs").mkdir(exist_ok=True)
plt.savefig("graphs/mean_times_trendlines.png")
plt.show()


In [None]:
import matplotlib.pyplot as plt

areas = []
speedups = []
labels = []

for area, data in normalized_data_with_graph.items():
    dijkstra_time = data["methods"]["dijkstra"]["mean"]["point_estimate"]
    fast_sssp_time = data["methods"]["fast_sssp_sequential"]["mean"]["point_estimate"]
    speedup = dijkstra_time / fast_sssp_time

    # Format node count in human readable format
    nodes = data["graph"]["nodes"]
    if nodes >= 1_000_000:
        node_label = f"{nodes / 1_000_000:.1f}M"
    elif nodes >= 1_000:
        node_label = f"{nodes / 1_000:.0f}k"
    else:
        node_label = str(nodes)

    areas.append(area)
    speedups.append(speedup)
    labels.append(f"{area.replace('_', ' ').title()} ({node_label} nodes)")

# Sort by number of nodes for better visualization
sorted_indices = sorted(range(len(areas)), key=lambda i: normalized_data_with_graph[areas[i]]["graph"]["nodes"])
sorted_areas = [areas[i] for i in sorted_indices]
sorted_speedups = [speedups[i] for i in sorted_indices]
sorted_labels = [labels[i] for i in sorted_indices]

# Create the column graph
plt.figure(figsize=(14, 8))
bars = plt.bar(range(len(sorted_areas)), sorted_speedups, color=['green' if s > 1 else 'red' for s in sorted_speedups])

# Add speedup numbers above the bars
for i, (bar, speedup) in enumerate(zip(bars, sorted_speedups)):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2., height + 0.1,
             f'{speedup:.2f}x', ha='center', va='bottom', fontsize=10)

# Add a horizontal line at y=1 to show no speedup/slowdown
plt.axhline(y=1, color='black', linestyle='--', alpha=0.7, label='No speedup/slowdown')

plt.xlabel('Area')
plt.ylabel('Speedup (dijkstra / fast_sssp_sequential)')
plt.xticks(range(len(sorted_areas)), sorted_labels, rotation=30, ha='right')
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

average_speedup = sum(sorted_speedups) / len(sorted_speedups)
print(f"\nAverage speedup across all areas: {average_speedup:.2f}x")
# Save the speedup graph
Path("graphs").mkdir(exist_ok=True)
plt.savefig("graphs/speedup_comparison.png", dpi=300, bbox_inches='tight')
speedups_excluding_first_3 = sorted_speedups[3:]
average_speedup_excluding_first_3 = sum(speedups_excluding_first_3) / len(speedups_excluding_first_3)
print(f"Average speedup excluding for areas larger than 50k Nodes: {average_speedup_excluding_first_3:.2f}x")

In [None]:
# Copy of graph above with larger figure size and fonts
areas = []
speedups = []
labels = []

for area, data in normalized_data_with_graph.items():
    dijkstra_time = data["methods"]["dijkstra"]["mean"]["point_estimate"]
    fast_sssp_time = data["methods"]["fast_sssp_sequential"]["mean"]["point_estimate"]
    speedup = dijkstra_time / fast_sssp_time

    # Format node count in human readable format
    nodes = data["graph"]["nodes"]
    if nodes >= 1_000_000:
        node_label = f"{nodes / 1_000_000:.1f}M"
    elif nodes >= 1_000:
        node_label = f"{nodes / 1_000:.0f}k"
    else:
        node_label = str(nodes)

    areas.append(area)
    speedups.append(speedup)
    labels.append(f"{area.replace('_', ' ').title()} ({node_label} nodes)")

# Sort by number of nodes for better visualization
sorted_indices = sorted(range(len(areas)), key=lambda i: normalized_data_with_graph[areas[i]]["graph"]["nodes"])
sorted_areas = [areas[i] for i in sorted_indices]
sorted_speedups = [speedups[i] for i in sorted_indices]
sorted_labels = [labels[i] for i in sorted_indices]

# Create the column graph with larger figure size and fonts
plt.figure(figsize=(16, 10))
bars = plt.bar(range(len(sorted_areas)), sorted_speedups, color=['green' if s > 1 else 'red' for s in sorted_speedups])

# Add speedup numbers above the bars with larger font
for i, (bar, speedup) in enumerate(zip(bars, sorted_speedups)):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width() / 2., height + 0.1,
             f'{speedup:.2f}x', ha='center', va='bottom', fontsize=14, fontweight='bold')

# Add a horizontal line at y=1 to show no speedup/slowdown
plt.axhline(y=1, color='black', linestyle='--', alpha=0.7, label='No speedup/slowdown')

plt.xlabel('Area', fontsize=16, fontweight='bold')
plt.ylabel('Speedup (dijkstra / fast_sssp_sequential)', fontsize=16, fontweight='bold')
plt.xticks(range(len(sorted_areas)), sorted_labels, rotation=30, ha='right', fontsize=12)
plt.yticks(fontsize=12)
plt.grid(True, alpha=0.3)
plt.legend(fontsize=14)

# Save the speedup graph
Path("graphs").mkdir(exist_ok=True)
plt.savefig("graphs/speedup_comparison_large.png", dpi=300, bbox_inches='tight')
plt.tight_layout()
plt.show()

average_speedup = sum(sorted_speedups) / len(sorted_speedups)
print(f"\nAverage speedup across all areas: {average_speedup:.2f}x")

speedups_excluding_first_3 = sorted_speedups[3:]
average_speedup_excluding_first_3 = sum(speedups_excluding_first_3) / len(speedups_excluding_first_3)
print(f"Average speedup excluding for areas larger than 50k Nodes: {average_speedup_excluding_first_3:.2f}x")

In [None]:
# Load graph metrics
metric_files = list(Path("graph_metrics").iterdir())

# Load graph metrics from each file
graph_metrics = {}
for file_path in metric_files:
    # Extract map name from file name (remove _metrics.json)
    map_name = file_path.stem.replace('_metrics', '')
    
    # Load JSON data from file
    try:
        with open(file_path, 'r') as f:
            graph_metrics[map_name] = json.load(f)
    except Exception as e:
        print(f"Error loading {file_path}: {e}")

graph_metrics

# Create a function to process distribution data
def fix_distribution_data(metrics_data):
    for map_name, map_data in metrics_data.items():
        # Check if 'degrees' exists and contains 'distribution'
        if 'degrees' in map_data and 'distribution' in map_data['degrees']:
            distribution = map_data['degrees']['distribution']
            
            # Convert string keys to int and sort
            if isinstance(distribution, dict):
                new_distribution = {}
                # Convert string keys to integers
                for key, value in distribution.items():
                    try:
                        new_distribution[int(key)] = value
                    except ValueError:
                        # Keep as is if not convertible to int
                        new_distribution[key] = value
                
                # Create a sorted dictionary
                sorted_distribution = {k: new_distribution[k] for k in sorted(new_distribution.keys())}
                map_data['degrees']['distribution'] = sorted_distribution
    
    return metrics_data

# Apply the fix to the loaded graph metrics
graph_metrics = fix_distribution_data(graph_metrics)

graph_metrics

In [None]:
# Create a LaTeX table with graph metrics information

# Prepare data for the table
table_data = []

# Keys we want to extract from each graph
metrics_to_extract = [
    ('graph_info.vertices', 'Vertices'),
    ('graph_info.edges', 'Edges'),
    ('degrees.average', 'Avg Degree'),
    ('degrees.max', 'Max Degree'),
    ('diameter.value', 'Diameter'),
    ('clustering.average', 'Clustering Coef')
]

# Process data for each map in graph_metrics
for map_name in graph_metrics.keys():
    row_data = {'Map': map_name.replace('_', ' ').title()}
    
    metrics = graph_metrics[map_name]
    
    # Extract nested values
    for key_path, label in metrics_to_extract:
        parts = key_path.split('.')
        value = metrics
        try:
            for part in parts:
                value = value[part]
            
            # Format the value based on its type
            if isinstance(value, (int)):
                row_data[label] = f"{value:,}"
            elif isinstance(value, float):
                row_data[label] = f"{value:.4f}"
            else:
                row_data[label] = str(value)
        except (KeyError, TypeError):
            row_data[label] = "N/A"
    
    table_data.append(row_data)

# Sort the table data by number of vertices (ascending)
table_data.sort(key=lambda x: int(x['Vertices'].replace(',', '')))

# Generate LaTeX table
latex_lines = [
    "\\begin{table}[htbp]",
    "\\centering",
    "\\caption{Graph Metrics Summary}",
    "\\begin{tabular}{|l|r|r|r|r|r|p|}",  # r for right-alignment of numeric columns
    "\\hline",
    "\\textbf{Map} & \\textbf{Vertices} & \\textbf{Edges} & \\textbf{Avg Degree} & \\textbf{Max Degree} & \\textbf{Diameter} & \\textbf{Network Clustering Coef} \\\\",
    "\\hline"
]

# Add data rows
for row in table_data:
    latex_lines.append(f"{row['Map']} & {row['Vertices']} & {row['Edges']} & {row['Avg Degree']} & {row['Max Degree']} & {row['Diameter']} & {row['Clustering Coef']} \\\\")

# Close the table
latex_lines.extend([
    "\\hline",
    "\\end{tabular}",
    "\\label{tab:graph_metrics}",
    "\\end{table}"
])

# Join all lines and print the LaTeX table
latex_table = "\n".join(latex_lines)
print(latex_table)

In [None]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

# Sort maps by node count
sorted_maps = sorted(graph_metrics.keys(), 
                    key=lambda m: graph_metrics[m]['graph_info']['vertices'])

# Collect all degree distributions and calculate statistics
degree_stats = {}

for map_name in sorted_maps:
    metrics = graph_metrics[map_name]
    
    if 'degrees' in metrics and 'distribution' in metrics['degrees']:
        distribution = metrics['degrees']['distribution']
        total_nodes = metrics['graph_info']['vertices']
        
        # Group degrees 7 and over together
        degree_7_plus_count = 0
        degree_7_plus_idx = "7 to 20"
        
        for degree_str, frequency_str in distribution.items():
            degree = int(degree_str)
            frequency = int(frequency_str)
            
            if degree >= 7:
                degree_7_plus_count += frequency
            else:
                percentage = (frequency / total_nodes) * 100
                
                if degree not in degree_stats:
                    degree_stats[degree] = []
                degree_stats[degree].append(percentage)
        
        # Add the combined 7+ category
        if degree_7_plus_count > 0:
            percentage_7_plus = (degree_7_plus_count / total_nodes) * 100
            if degree_7_plus_idx not in degree_stats:
                degree_stats[degree_7_plus_idx] = []
            degree_stats[degree_7_plus_idx].append(percentage_7_plus)

# Calculate statistics for each degree
degrees = [0, 1, 2, 3, 4, 5, 6, degree_7_plus_idx]  # Fixed order
means = []
stds = []
all_data = []

for degree in degrees:
    if degree in degree_stats:
        data = degree_stats[degree]
        means.append(np.mean(data))
        stds.append(np.std(data))
        all_data.append(data)
    else:
        means.append(0)
        stds.append(0)
        all_data.append([])

# Create the bar plot
plt.figure(figsize=(12, 8))

# Create box plot to show distribution, whiskers, and outliers
box_data = [degree_stats.get(d, []) for d in degrees]
positions = list(range(len(degrees)))

bp = plt.boxplot([data for data in box_data if len(data) > 0], 
                positions=[i for i, data in enumerate(box_data) if len(data) > 0], 
                widths=0.6, patch_artist=True,
                showfliers=True, flierprops={'marker': 'o', 'markersize': 3, 'alpha': 0.6})

# Customize box plot appearance
for patch in bp['boxes']:
    patch.set_facecolor('lightblue')
    patch.set_alpha(0.7)

plt.xlabel('Degree (number of connections)')
plt.ylabel('Percentage of nodes (%)')
plt.grid(True, alpha=0.3, axis='y')

# Set x-axis labels
plt.xticks(positions, degrees)
plt.xlim(-0.5, len(degrees) - 0.5)

plt.tight_layout()
plt.savefig("graphs/degree_distribution.png", dpi=300, bbox_inches='tight')
plt.show()

# Print statistics
print("Degree Distribution Statistics Summary:")
print("-" * 70)
print(f"{'Degree':<8} {'Mean %':<10} {'Std %':<10} {'Maps':<8} {'Min %':<8} {'Max %':<8}")
print("-" * 70)

for i, degree in enumerate(degrees):
    if degree in degree_stats and len(degree_stats[degree]) > 0:
        data = degree_stats[degree]
        print(f"{str(degree):<8} {means[i]:<10.2f} {stds[i]:<10.2f} {len(data):<8d} "
              f"{min(data):<8.2f} {max(data):<8.2f}")
    else:
        print(f"{str(degree):<8} {'N/A':<10} {'N/A':<10} {'0':<8} {'N/A':<8} {'N/A':<8}")
