In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os

plt.rcParams["figure.figsize"] = (7, 5)
plt.rcParams["axes.grid"] = True

In [None]:
# Change this path to the run you want to analyze
run_dirs = os.listdir("scaling_results")
run_dirs.sort()
# run_dir = Path("scaling_results/strong_20260116_142012")
strong_run_dirs = [d for d in run_dirs if "strong" in d]
strong_run_dir = "scaling_results" / Path(strong_run_dirs[-1])
weak_run_dirs = [d for d in run_dirs if "weak" in d]
weak_run_dir = "scaling_results" / Path(weak_run_dirs[-1])

In [None]:
csv_path = strong_run_dir / "summary.csv"

df = pd.read_csv(csv_path)

# Sort for safety
df = df.sort_values("ranks").reset_index(drop=True)

# Group by ranks and compute mean and std for metrics
df_agg = df.groupby("ranks").agg({
    "wall_seconds": ["mean", "std"],
    "particle_steps_per_sec": ["mean", "std"],
    "npt_per_rank": "first",  # Should be same for all runs with same rank
    "total_particles": "first",  # Should be same for all runs with same rank
}).reset_index()

# Flatten column names
df_agg.columns = ["ranks", "wall_seconds_mean", "wall_seconds_std", 
                   "particle_steps_per_sec_mean", "particle_steps_per_sec_std",
                   "npt_per_rank", "total_particles"]

# Sort by ranks
df_agg = df_agg.sort_values("ranks").reset_index(drop=True)

display(df_agg.head())

In [None]:
plt.figure()
plt.errorbar(df_agg["ranks"], df_agg["wall_seconds_mean"], 
             yerr=df_agg["wall_seconds_std"], marker="o", capsize=5, capthick=2)
plt.xscale("log", base=2)
plt.yscale("log")

plt.xlabel("MPI ranks")
plt.ylabel("Wall time [s]")
plt.title("Wall Time vs MPI Ranks (mean ± std)")
plt.savefig(strong_run_dir / "wall_time_vs_ranks.png", dpi=300)

In [None]:
plt.figure()
plt.errorbar(df_agg["ranks"], df_agg["particle_steps_per_sec_mean"], 
             yerr=df_agg["particle_steps_per_sec_std"], marker="o", capsize=5, capthick=2)
plt.xscale("log", base=2)
plt.yscale("log")

plt.xlabel("MPI ranks")
plt.ylabel("Particle-steps / second")
plt.title("Throughput Scaling (mean ± std)")
plt.savefig(strong_run_dir / "throughput_scaling.png", dpi=300)

In [None]:
# Reference run: smallest rank count (use mean)
ref_mean = df_agg.iloc[0]["wall_seconds_mean"]

# Calculate speedup and efficiency using mean values
df_agg["speedup_mean"] = ref_mean / df_agg["wall_seconds_mean"]
df_agg["speedup_std"] = ref_mean * df_agg["wall_seconds_std"] / (df_agg["wall_seconds_mean"] ** 2)

ref_ranks = df_agg.iloc[0]["ranks"]
df_agg["efficiency_mean"] = df_agg["speedup_mean"] / (df_agg["ranks"] / ref_ranks)
df_agg["efficiency_std"] = df_agg["speedup_std"] / (df_agg["ranks"] / ref_ranks)

fig, ax1 = plt.subplots()

ax1.errorbar(df_agg["ranks"], df_agg["speedup_mean"], 
             yerr=df_agg["speedup_std"], marker="o", capsize=5, capthick=2, label="Speedup")
ax1.set_xscale("log", base=2)
ax1.set_xlabel("MPI ranks")
ax1.set_ylabel("Speedup")

ax2 = ax1.twinx()
ax2.errorbar(df_agg["ranks"], df_agg["efficiency_mean"], 
             yerr=df_agg["efficiency_std"], marker="s", linestyle="--", 
             capsize=5, capthick=2, label="Efficiency")
ax2.set_ylabel("Parallel Efficiency")

fig.legend(loc="upper right")
plt.title("Strong Scaling: Speedup & Efficiency (mean ± std)")
plt.savefig(strong_run_dir / "speedup_efficiency.png", dpi=300)

In [None]:
display(df_agg[[
    "ranks",
    "npt_per_rank",
    "total_particles",
    "wall_seconds_mean",
    "wall_seconds_std",
    "particle_steps_per_sec_mean",
    "particle_steps_per_sec_std",
    "efficiency_mean" if "efficiency_mean" in df_agg.columns else "weak_efficiency_mean"
]])

In [None]:
csv_path = weak_run_dir / "summary.csv"

df = pd.read_csv(csv_path)

# Sort for safety
df = df.sort_values("ranks").reset_index(drop=True)

# Group by ranks and compute mean and std for metrics
df_agg = df.groupby("ranks").agg({
    "wall_seconds": ["mean", "std"],
    "particle_steps_per_sec": ["mean", "std"],
    "npt_per_rank": "first",  # Should be same for all runs with same rank
    "total_particles": "first",  # Should be same for all runs with same rank
}).reset_index()

# Flatten column names
df_agg.columns = ["ranks", "wall_seconds_mean", "wall_seconds_std", 
                   "particle_steps_per_sec_mean", "particle_steps_per_sec_std",
                   "npt_per_rank", "total_particles"]

# Sort by ranks
df_agg = df_agg.sort_values("ranks").reset_index(drop=True)

display(df_agg.head())

In [None]:
plt.figure()
plt.errorbar(df_agg["ranks"], df_agg["wall_seconds_mean"], 
             yerr=df_agg["wall_seconds_std"], marker="o", capsize=5, capthick=2)
plt.xscale("log", base=2)
plt.yscale("log")

plt.xlabel("MPI ranks")
plt.ylabel("Wall time [s]")
plt.title("Wall Time vs MPI Ranks (mean ± std)")
plt.savefig(weak_run_dir / "wall_time_vs_ranks.png", dpi=300)

In [None]:
plt.figure()
plt.errorbar(df_agg["ranks"], df_agg["particle_steps_per_sec_mean"], 
             yerr=df_agg["particle_steps_per_sec_std"], marker="o", capsize=5, capthick=2)
plt.xscale("log", base=2)
plt.yscale("log")

plt.xlabel("MPI ranks")
plt.ylabel("Particle-steps / second")
plt.title("Throughput Scaling (mean ± std)")
plt.savefig(weak_run_dir / "throughput_scaling.png", dpi=300)

In [None]:
t_ref_mean = df_agg.iloc[0]["wall_seconds_mean"]
df_agg["weak_efficiency_mean"] = t_ref_mean / df_agg["wall_seconds_mean"]
df_agg["weak_efficiency_std"] = t_ref_mean * df_agg["wall_seconds_std"] / (df_agg["wall_seconds_mean"] ** 2)

plt.figure()
plt.errorbar(df_agg["ranks"], df_agg["weak_efficiency_mean"], 
             yerr=df_agg["weak_efficiency_std"], marker="o", capsize=5, capthick=2)
plt.xscale("log", base=2)

plt.xlabel("MPI ranks")
plt.ylabel("Weak Scaling Efficiency")
plt.title("Weak Scaling Efficiency (Ideal = 1) (mean ± std)")
plt.ylim(0, 1.1)
plt.savefig(weak_run_dir / "weak_scaling_efficiency.png", dpi=300)

In [None]:
display(df_agg[[
    "ranks",
    "npt_per_rank",
    "total_particles",
    "wall_seconds_mean",
    "wall_seconds_std",
    "particle_steps_per_sec_mean",
    "particle_steps_per_sec_std",
    "efficiency_mean" if "efficiency_mean" in df_agg.columns else "weak_efficiency_mean"
]])