In [45]:
import pandas as pd
import altair as alt
from humanfriendly import parse_size

In [46]:
def convert_to_timedelta(duration_str, unit="m"):
    conv = {"d": 86400,"h": 3600, "m": 60}
    vals = duration_str.split(" ")
    total = 0
    for val in vals:
        if "d" in val:
            total += int(val.replace("d", "")) * conv["d"]
        if "h" in val:
            total += int(val.replace("h", "")) * conv["h"]
        if "m" in val and "ms" not in val:
            total += int(val.replace("m", "")) * conv["m"]
        if "s" in val:
            total += float(val.replace("s", ""))
    return pd.Timedelta(total, unit="s").total_seconds() / conv[unit]

In [47]:
df = pd.read_csv("data/execution_trace_2024-09-19_01-28-53.txt", sep="\t")

In [48]:
df["tool"] = df.name.str.split(":", expand=True)[1].str.split(" ",expand=True)[0]

In [49]:
df = df[(df.name.str.contains("RUN:MESS")) | (df.name.str.contains("RUN:CAMISIM:SIMULATE"))]
df["genomes"] = df.name.str.split(" ", expand=True)[1].str.replace("(", "").str.replace(")", "")
df["genomes"] = [int(nb) for nb in df["genomes"]]

In [51]:
df["duration_min"] = df.duration.apply(convert_to_timedelta)
df["realtime_min"] = df.realtime.apply(convert_to_timedelta)

In [52]:
df["tool"] = df["tool"].replace({"MESS": "MeSS v0.9.0", "CAMISIM": "CAMISIM v1.3.0"})

In [53]:
df["RAM"] = [parse_size(size) / 10**9 for size in df.peak_rss]
df["%cpu"] = [float(usage.replace("%","")) for usage in df["%cpu"]]
df["cpu"] = [cpu / 100 for cpu in df["%cpu"]]
df["calc_time"] = df["realtime_min"] * df["cpu"]

In [54]:
bp = alt.Chart(df[["tool","RAM"]]).mark_boxplot(size=90).encode(
    alt.X("tool:N", axis=alt.Axis(labelAngle=0)),
    alt.Y("RAM:Q").title("RAM (GB)"),
    alt.Color("tool:N").legend(None),
).properties(
    width=600,
    height=300
).configure_axis(
    labelFontSize=12,
    titleFontSize=12
)
bp

In [56]:
bp.save('figures/ram-usage.png', ppi=400)

In [57]:
bp = alt.Chart(df[["tool","cpu"]]).mark_boxplot(size=90).encode(
    alt.X("tool:N",axis=alt.Axis(labelAngle=0)),
    alt.Y("cpu:Q").title("CPU usage"),
    alt.Color("tool:N").legend(None),
).properties(
    width=600,
    height=300
).configure_axis(
    labelFontSize=12,
    titleFontSize=12
)
bp

In [58]:
bp.save('figures/cpu-usage.png', ppi=400)

In [59]:
line = (
    alt.Chart(df[["genomes","calc_time","tool"]])
    .mark_line(point=True)
    .encode(
        x=alt.X("genomes"),
        y=alt.Y("calc_time").title("CPU time (min)"),
        color="tool",
        tooltip=["calc_time","genomes"]
    )
).properties(
    width=600,
    height=300
).configure_axis(
    labelFontSize=12,
    titleFontSize=12,
)
line

In [60]:
line.save('figures/cpu-time.png', ppi=400)