In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Load results CSV
CSV_FILE = "results_1.3.csv"
res_df = pd.read_csv(CSV_FILE)

# Ensure plots directory exists
os.makedirs('plots', exist_ok=True)

# Normalize dataframe
df = res_df.copy()
df['user'] = df.get('user', 'unknown')
df['version'] = df['version'].astype(str).str.strip().str.lower()
df['size'] = pd.to_numeric(df.get('size'), errors='coerce').fillna(0).astype(int)

time_cols = ["time_init", "time_compute", "time_thread_create", "time_thread_join", "time_cleanup", "time_total"]
for col in time_cols:
    df[col] = pd.to_numeric(df.get(col), errors='coerce').fillna(0)

unique_users = sorted(df["user"].unique())
unique_sizes = sorted(df["size"].unique())
versions = ["sequential", "parallel_unpadded", "parallel_padded", "parallel_local_accum"]
colors = {
    "sequential": "#7f7f7f",
    "parallel_unpadded": "#ff7f0e",
    "parallel_padded": "#2ca02c",
    "parallel_local_accum": "#d62728"
}

# Track saved files to avoid double-saving within one run
_saved_plot_paths = set()

def size_label(n):
    return f"{n//1_000_000}M" if n >= 1_000_000 else f"{n//1000}K" if n >= 1000 else str(n)

# --- Function to overlay speedups with error propagation ---
def plot_speedup_overlay(seq_means, par_means_dict, sizes, title, outpath, seq_stds=None, par_stds_dict=None):
    fig, ax = plt.subplots(figsize=(12, 6))
    fig.suptitle(title, fontsize=14, fontweight="bold")

    x = np.arange(len(sizes))
    for par_version, par_means in par_means_dict.items():
        # Convert to numpy arrays
        seq = np.asarray(seq_means, dtype=float)
        par = np.asarray(par_means, dtype=float)

        with np.errstate(divide='ignore', invalid='ignore'):
            speedup = np.where(par > 0, seq / par, np.nan)

        # Error propagation for S = A / B: var(S) ≈ S^2 * ( (σ_A/μ_A)^2 + (σ_B/μ_B)^2 )
        speedup_err = np.full_like(speedup, np.nan)
        if seq_stds is not None and par_stds_dict is not None and par_version in par_stds_dict:
            seq_s = np.asarray(seq_stds, dtype=float)
            par_s = np.asarray(par_stds_dict[par_version], dtype=float)
            # Avoid division by zero; only compute where means > 0
            valid = (seq > 0) & (par > 0)
            rel_sq = np.zeros_like(speedup)
            # compute relative squared errors safely
            rel_sq[valid] = (seq_s[valid] / seq[valid]) ** 2 + (par_s[valid] / par[valid]) ** 2
            speedup_err[valid] = np.abs(speedup[valid]) * np.sqrt(rel_sq[valid])

        # Plot mean
        ax.plot(x, speedup, marker='o', linewidth=2, markersize=8,
                label=par_version.replace('_', ' ').capitalize(), color=colors.get(par_version, '#444'))

        # Plot error band if available
        if np.any(np.isfinite(speedup_err)):
            lower = speedup - speedup_err
            upper = speedup + speedup_err
            ax.fill_between(x, lower, upper, alpha=0.2, color=colors.get(par_version, '#444'))
            # also show small errorbars on markers
            ax.errorbar(x, speedup, yerr=speedup_err, fmt='none', ecolor=colors.get(par_version, '#444'), capsize=4, alpha=0.8)

    ax.set_xlabel("Array Size", fontsize=11)
    ax.set_ylabel("Speedup", fontsize=11)
    ax.set_xticks(x)
    ax.set_xticklabels([size_label(s) for s in sizes])
    ax.grid(True, alpha=0.3)
    ax.legend(fontsize=10)
    plt.tight_layout()

    # Avoid saving same path twice in one run
    if outpath not in _saved_plot_paths:
        fig.savefig(outpath, dpi=300, bbox_inches="tight")
        _saved_plot_paths.add(outpath)
        print(f"Saved: {outpath}")
    else:
        print(f"Skipped duplicate save: {outpath}")

    plt.close(fig)

# --- Per-user plots ---
for user in unique_users:
    user_df = df[df["user"] == user].copy()
    if user_df.empty:
        continue

    stats = user_df.groupby(["version", "size"])[time_cols].agg(["mean", "std"]).reset_index()
    stats.columns = ["_".join(c).strip("_") for c in stats.columns]

    # Timing plots (same as before)
    for metric in ["time_total", "time_compute"]:
        fig, ax = plt.subplots(figsize=(12, 6))
        fig.suptitle(f"Array Analysis — {metric.replace('time_', '').replace('_', ' ').title()} — User: {user}", fontsize=14, fontweight="bold")
        jitter_offset = np.linspace(-0.2, 0.2, len(versions))

        for idx, version in enumerate(versions):
            version_stats = stats[stats["version"] == version]
            if version_stats.empty: continue
            x_jitter = np.arange(len(unique_sizes)) + jitter_offset[idx]
            y_mean = version_stats[f"{metric}_mean"].values
            y_std = version_stats[f"{metric}_std"].values
            ax.errorbar(x_jitter, y_mean, yerr=y_std, marker='o', label=version.replace('_', ' ').capitalize(),
                        linewidth=2, capsize=5, linestyle='-', alpha=0.8, color=colors.get(version, "#444"))

        ax.set_xlabel("Array Size", fontsize=11)
        ax.set_ylabel(f"{metric.replace('time_', '').replace('_', ' ').title()} (s)", fontsize=11)
        ax.set_xticks(range(len(unique_sizes)))
        ax.set_xticklabels([size_label(s) for s in unique_sizes])
        ax.legend(fontsize=10)
        ax.grid(True, alpha=0.3)
        plt.tight_layout()
        outpath = f"plots/plot_1_3_{user}_{metric}.png"
        if outpath not in _saved_plot_paths:
            fig.savefig(outpath, dpi=300, bbox_inches="tight")
            _saved_plot_paths.add(outpath)
            print(f"Saved: {outpath}")
        else:
            print(f"Skipped duplicate save: {outpath}")
        plt.close(fig)

    # --- Overlay speedup plot (total) ---
    seq_stats = stats[stats["version"] == "sequential"]
    common_sizes = sorted(set(seq_stats["size"]) & set(stats["size"]))
    if not common_sizes: continue

    seq_times = seq_stats[seq_stats["size"].isin(common_sizes)].set_index("size")["time_total_mean"].reindex(common_sizes).values
    seq_times_std = seq_stats[seq_stats["size"].isin(common_sizes)].set_index("size")["time_total_std"].reindex(common_sizes).values

    par_times_dict = {}
    par_times_std_dict = {}
    for par_version in ["parallel_unpadded", "parallel_padded", "parallel_local_accum"]:
        par_times = stats[(stats["version"] == par_version) & (stats["size"].isin(common_sizes))]
        par_times = par_times.set_index("size")["time_total_mean"].reindex(common_sizes).values
        par_times_std = stats[(stats["version"] == par_version) & (stats["size"].isin(common_sizes))].set_index("size")["time_total_std"].reindex(common_sizes).values
        par_times_dict[par_version] = par_times
        par_times_std_dict[par_version] = par_times_std

    plot_speedup_overlay(seq_times, par_times_dict, common_sizes,
                         f"Array Analysis — Total-time Speedup — User: {user}",
                         f"plots/plot_1_3_{user}_speedup_overlay.png",
                         seq_stds=seq_times_std, par_stds_dict=par_times_std_dict)

    # --- Overlay compute-time speedup for this user ---
    seq_times_compute = seq_stats[seq_stats["size"].isin(common_sizes)].set_index("size")["time_compute_mean"].reindex(common_sizes).values
    seq_times_compute_std = seq_stats[seq_stats["size"].isin(common_sizes)].set_index("size")["time_compute_std"].reindex(common_sizes).values
    par_times_compute_dict = {}
    par_times_compute_std_dict = {}
    for par_version in ["parallel_unpadded", "parallel_padded", "parallel_local_accum"]:
        par_times_c = stats[(stats["version"] == par_version) & (stats["size"].isin(common_sizes))]
        par_times_c_mean = par_times_c.set_index("size")["time_compute_mean"].reindex(common_sizes).values
        par_times_c_std = par_times_c.set_index("size")["time_compute_std"].reindex(common_sizes).values
        par_times_compute_dict[par_version] = par_times_c_mean
        par_times_compute_std_dict[par_version] = par_times_c_std

    plot_speedup_overlay(seq_times_compute, par_times_compute_dict, common_sizes,
                         f"Array Analysis — Compute-time Speedup — User: {user}",
                         f"plots/plot_1_3_{user}_compute_speedup_overlay.png",
                         seq_stds=seq_times_compute_std, par_stds_dict=par_times_compute_std_dict)

print("\nOverlay speedup plots generated for individual users (total + compute).")

Saved: plots/plot_1_3_ea24205_time_total.png
Saved: plots/plot_1_3_ea24205_time_compute.png
Saved: plots/plot_1_3_ea24205_speedup_overlay.png
Saved: plots/plot_1_3_ea24205_compute_speedup_overlay.png
Saved: plots/plot_1_3_phoebus_time_total.png
Saved: plots/plot_1_3_phoebus_time_compute.png
Saved: plots/plot_1_3_phoebus_speedup_overlay.png
Saved: plots/plot_1_3_phoebus_compute_speedup_overlay.png

Overlay speedup plots generated for individual users (total + compute).
