In [7]:
import pandas as pd
import plotly.express as px
import os

In [8]:
#!scp bullx:/scratch/sebastian.horl/srr/output/*.json .

In [9]:
df = []

for scenario in ["berlin", "idf"]:
    for calculation in ["Initial", "Cached"]:
        for distance in [100, 200, 400, 800]:
            path = "benchmark_{}_{}_{}.json".format(scenario, calculation, distance)

            if os.path.exists(path):
                with open(path) as f:
                    df_partial = pd.read_json(f)
                    df_partial["distance_m"] = distance
                    df_partial["calculation"] = calculation
                    df_partial["scenario"] = scenario
                    df.append(df_partial)

df = pd.concat(df)

df["phase"] = "loading"
df.loc[df["routing"], "phase"] = "routing"

In [10]:
px.line(
    df, x = "runtime_s", y = "memory_mb", color = "calculation",
    line_dash = "phase", facet_col = "distance_m", facet_row = "scenario",
    title = "Memory by runtime and loading/routing phase"
)

In [11]:
df_max = df.sort_values(by = ["scenario", "calculation", "distance_m"]).drop_duplicates(["scenario", "calculation", "distance_m"], keep = "last")[[
    "scenario", "calculation", "distance_m", "memory_mb"
]]

px.line(df_max, x = "distance_m", y = "memory_mb", color = "calculation", 
    facet_col = "scenario", markers = True,
    title = "Maximum memory by transfer distance")

In [12]:
df_routing = df[df["routing"]].groupby(["scenario", "calculation", "distance_m"]).aggregate({
    "runtime_s": ["min", "max"]
})

df_routing["routing_time_s"] = df_routing[("runtime_s", "max")] - df_routing[("runtime_s", "min")]
df_routing = df_routing.reset_index()[["scenario", "calculation", "distance_m", "routing_time_s"]]

px.line(df_routing, x = "distance_m", y = "routing_time_s", color = "calculation",
    facet_col = "scenario", markers = True,
    title = "Routing time by transfer distance")