In [2]:
import pandas as pd
import json
from datetime import datetime
from pathlib import Path

FOLDER = None
if FOLDER is not None:
    FOLDER = "../results/" + FOLDER
else:
    FOLDER = sorted(Path("../results").iterdir())[-1]
    FOLDER = str(FOLDER)
OUT = FOLDER + "/analysis"
Path(OUT).mkdir(parents=True, exist_ok=True)
print(FOLDER)
n_workers = len(list(Path(FOLDER).rglob("log_*.jsonl"))) - 1
print(f"Number of workers: {n_workers}")

../results/2025-04-03_16:42:10
Number of workers: 10


In [3]:
worker_logs: list[list[dict]] = []
for i in range(1, n_workers + 1):
    with open(FOLDER + f"/log_{i}.jsonl") as f:
        worker_logs.append([json.loads(line) for line in f])

with open(FOLDER + "/log_0.jsonl") as f:
    master_log = [json.loads(line) for line in f]

In [4]:
data = {
    "worker_id": [],
    "duration (ms)": [],
    "payload_size": [],
}
worker_idxs = [0] * n_workers 

def get_next_log(worker_idx, event):
    worker_idx -= 1
    while worker_idxs[worker_idx] < len(worker_logs[worker_idx]):
        log = worker_logs[worker_idx][worker_idxs[worker_idx]]
        worker_idxs[worker_idx] += 1
        if log["event"] == event:
            return log

for log in master_log:
    if log["event"] == "send":
        worker_id = log["receiver"]
        start = datetime.fromtimestamp(log["timestamp"])
        worker_log = get_next_log(worker_id, "recv")
        end = datetime.fromtimestamp(worker_log["timestamp"])
    elif log["event"] == "recv":
        worker_id = log["sender"]
        end = datetime.fromtimestamp(log["timestamp"])
        worker_log = get_next_log(worker_id, "send")
        start = datetime.fromtimestamp(worker_log["timestamp"])
    else:
        continue
    if end < start:
        print("Invalid log")
    data["worker_id"].append(worker_id)
    data["duration (ms)"].append((end - start).total_seconds() * 1000)
    data["payload_size"].append(log["payload_size"])

df = pd.DataFrame(data)
df["worker_id"] = df["worker_id"].astype(str)
df.head()

Unnamed: 0,worker_id,duration (ms),payload_size
0,1,0.061,64
1,2,0.077,64
2,3,0.084,64
3,4,0.073,64
4,5,0.08,64


In [5]:
run_start = [log for log in master_log if log["event"] == "start"][0]["timestamp"]
run_start = datetime.fromtimestamp(run_start)
run_end = [log for log in master_log if log["event"] == "end"][0]["timestamp"]
run_end = datetime.fromtimestamp(run_end)
run_duration = run_end - run_start
print(f"Run duration: {run_duration.total_seconds():.3f} s")

Run duration: 21.488 s


In [6]:
total_duration = df["duration (ms)"].sum() / 1000
total_payload = df["payload_size"].sum() / (1024 * 1024) 
print(f"Total duration: {total_duration:.3f} s")
print(f"Total payload: {total_payload:.3f} mb")

Total duration: 0.019 s
Total payload: 14.581 mb


In [7]:
# time for each worker
new_df = df.groupby("worker_id").sum()
new_df = new_df.reset_index()
new_df["payload_size"] = new_df["payload_size"] / (1024)
new_df.head(10)

Unnamed: 0,worker_id,duration (ms),payload_size
0,1,1.65,1493.052734
1,10,1.823,1493.052734
2,2,1.893,1493.052734
3,3,2.024,1493.052734
4,4,2.652,1493.052734
5,5,1.741,1493.052734
6,6,1.568,1493.052734
7,7,1.627,1493.052734
8,8,1.935,1493.052734
9,9,2.057,1493.052734
