In [None]:
import os 
from pathlib import Path

def find_dist_logs(base_dir):
    dist_log = []
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.startswith("dist") and file.endswith(".log"):
                full_path = os.path.join(root, file)
                dist_log.append(full_path)
    return dist_log

def keep_first_last_n_lines(filepath, n=10):
    with open(filepath, 'r') as f:
        lines = f.readlines()
    if len(lines) <= 2 * n:
        # File is already short, keep all lines
        return
    # Get the first n and last n lines
    new_lines = lines[:n] + lines[-n:]
    with open(filepath, 'w') as f:
        f.writelines(new_lines)

base_dir = Path("~/Projects/chameleon/EXPS").expanduser()
for filepath in find_dist_logs(base_dir):
    keep_first_last_n_lines(filepath, n=10)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from pathlib import Path
import os, json, re
import numpy as np
import seaborn as sns
#from mpl_toolkits.axes_grid1 import host_subplot
#import mpl_toolkits.axisartist as AA

In [3]:
def find_stats_json_files(base_dir):
    """Recursively find all system stats JSONL files under base_dir."""
    #base_dir = os.path.expanduser(base_dir)
    stats_jsons = []
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.startswith("stats_") and file.endswith(".json"):
                full_path = os.path.join(root, file)
                stats_jsons.append(full_path)
    return stats_jsons

def parse_path_fields(path):
    parts = path.split(os.sep)
    #print(f"[DEBUG] parts: {parts}")
    filename = Path(path).name     # sys_stats_<port>_R<run>.jsonl
    stem = Path(filename).stem     # sys_stats_<port>_R<run>
    
    run = None
    for part in stem.split("_"):
        if part.startswith("R") and part[1:].isdigit():
            run = int(part[1:])
            break

    for i, p in enumerate(parts):
        #if p in ("mini-apps", "iperf"):
        if p in ("iperf", "mini-apps"):
            return {
                "app": p,
                #"date": parts[i+1],
                "proxy": parts[i+1],
                "congestion": parts[i+2],
                "parallel": parts[i+3],
                "duration": parts[i+4],
                "run": run
            }
    raise ValueError(f"Path {path} does not match expected pattern.")

def load_all_stats(base_dir):
    records = []
    for filepath in find_stats_json_files(base_dir):
        fields = parse_path_fields(filepath)
        with open(filepath) as f:
            first_line = f.readline()
            try:
                metadata = json.loads(first_line).get("metadata", {})
            except Exception as e:
                print(f"couldn't read the first line of {filepath}: {e}")
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    rec = json.loads(line)
                except Exception:
                    continue
                if "timestamp" not in rec:
                    continue
                row = {**fields, **rec, **metadata}
                records.append(row)
    return pd.DataFrame(records)


In [None]:
BASE_DIR = Path("~/Projects/chameleon/EXPS/cons").expanduser()
df = load_all_stats(BASE_DIR)
csv_path = "../data/cons.csv"

if os.path.exists(csv_path):
    if "per_cpu" in update_df.columns and update_df["per_cpu"].apply(lambda x: isinstance(x, list)).any():
        update_df["per_cpu"] = update_df["per_cpu"].apply(str)
    old_df = pd.read_csv(csv_path)
    update_df = pd.concat([old_df, df], ignore_index=True)
    update_df = update_df.drop_duplicates()
else:
    update_df = df

#df.to_csv("../data/cons.csv", index=False, mode="a", header=False)
update_df.to_pickle("../data/df.pkl")
update_df.to_csv("../data/cons.csv", index=False)


TypeError: unhashable type: 'list'

In [None]:
#df = pd.read_pickle("data/df.pkl")
df = pd.read_csv("../data/cons.csv")


In [None]:
#print(f"DataFrame shape: {df.head(5)}{df.shape}")
#print(df.columns)
#print(df.sample(3))

In [None]:
# FOR NET_RX which is based on cons
#df["net_rx_Gbps"] = pd.to_numeric(df["net_rx_Gbps"], errors="coerce")

def top_n_net_rx(group):
    n = int(group['duration'].iloc[0][1:]) #Tx = x
    return group.sort_values('net_rx_Gbps', ascending=False).head(n)

"""# sort them by the net_rx_Gbps in descending order
df_sorted = df.sort_values(
    ["app", "proxy", "congestion", "parallel", "duration", "run", "net_rx_Gbps"], 
    #["app", "date", "proxy", "congestion", "flow", "duration", "run", "net_rx_Gbps"], 
    ascending=[True, True, True, True, True, True, False]
)"""

"""# just extract the top N rows per group (the N is the duration which the value is more than 0))
topN_per_group = (
    df_sorted.groupby(
        ["app", "proxy", "congestion", "parallel", "duration", "run"], group_keys=False
    ).apply(top_n_net_rx)
    .reset_index(drop=True)
)"""

topN_per_group = (
    df.groupby(
        ['app', 'proxy', 'congestion', 'parallel', 'duration', 'run'], group_keys=False
    ).apply(top_n_net_rx)
    .reset_index(drop=True)
)

# add the second column (0, 1, ..., n-1) for each run
topN_per_group['second'] = topN_per_group.groupby(
    ['app', 'proxy', 'congestion', 'parallel', 'duration', 'run']
).cumcount()

topN_per_group = topN_per_group.sort_values(
    ['app', 'proxy', 'congestion', 'parallel', 'duration', 'run', 'second', 'timestamp']
)

# agg by the values in each file based on run
agg_df =(
    topN_per_group
    .groupby(['app', 'proxy', 'congestion', 'parallel', 'duration', 'run'])
    .agg(
        avg_net_rx_Gbps=('net_rx_Gbps', 'mean'),
        avg_net_tx_Gbps=('net_tx_Gbps', 'mean'),
        max_total_cpu=('total_cpu', 'max'),
        mean_total_cpu=('total_cpu', 'mean'),
        sum_disk_read_MB=('disk_read_MB', 'sum'),
        sum_disk_write_MB=('disk_write_MB', 'sum'),
        sum_total_rx_dropped=('total_rx_dropped', 'sum'),
        sum_total_tx_dropped=('total_tx_dropped', 'sum')
    )
    .reset_index()
)

agg_config_df = (
    agg_df.groupby(['app', 'proxy', 'congestion', 'parallel', 'duration'])
    .agg(
        mean_net_rx_Gbps=('avg_net_rx_Gbps', 'mean'),
        mean_net_tx_Gbps=('avg_net_tx_Gbps', 'mean'),
        max_total_cpu=('max_total_cpu', 'mean'),
        mean_total_cpu=('mean_total_cpu', 'mean'),
        sum_disk_read_MB=('sum_disk_read_MB', 'mean'),
        sum_disk_write_MB=('sum_disk_write_MB', 'mean'),
        sum_total_rx_dropped=('sum_total_rx_dropped', 'mean'),
        sum_total_tx_dropped=('sum_total_tx_dropped', 'mean'),
    )
    .reset_index()
)

# agg across runs for each second
agg_per_sec = (
    topN_per_group
    .groupby(['app', 'proxy', 'congestion', 'parallel', 'duration', 'second'])
    .agg(
        mean_net_rx_Gbps=('net_rx_Gbps', 'mean'),
        std_net_rx_Gbps=('net_rx_Gbps', 'std'),
        count=('net_rx_Gbps', 'count'),
    )
    .reset_index()
)

pd.set_option('display.max_columns', None)


In [None]:
#print(topN_per_group.sample(15))
#print(topN_per_group.head(15))
#print(f"[INFO] TopN-per-group DataFrame shape: {topN_per_group.shape}")

#print(agg_df.head(11))  # or iloc if you want exactly the first 10
#print(group_cols)
#print(f"[INFO] agg_config_df shape: {agg_df.shape}")

#print(agg_config_df.head(10))
#print(f"[INFO] agg_config_df shape: {agg_config_df.shape}")

#print(agg_config_df.head(10))
#print(agg_config_df['mean_net_tx_Gbps'].describe())

In [None]:
apps = agg_df["app"].unique()
proxies = agg_df["proxy"].unique()
congestions = agg_df["congestion"].unique()
parallels = sorted(agg_df["parallel"].unique(), key=lambda x: int(x[1:]))
durations = sorted(agg_df["duration"].unique(), key=lambda x: int(x[1:]))
runs = sorted(agg_df["run"].unique())


colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red']
proxy_linestyles = {
    "Nginx": '-',
    "HaproxySubprocess": '--',
    "StunnelSubprocess.v1.2": '-.',
    "StunnelSubprocess.v1.3": ':'
}

In [None]:
#parallels = ["P1", "P3", "P5"]
#proxies = ["HaproxySubprocess", "Nginx", "StunnelSubprocess.v1.2", "StunnelSubprocess.v1.3"]
#durations = sorted(agg_df["duration"].unique(), key=lambda x: int(x[1:]))



# to plot average of each run for a specific configuration
"""
subset = agg_df[
    (agg_df["app"] == "iperf") &
    (agg_df["proxy"] == "HaproxySubprocess") &
    (agg_df["congestion"] == "bbr") &
    (agg_df["parallel"] == "P1") &
    (agg_df["duration"] == "T10")
].sort_values("run")

# when cols is net_rx_Gbps, use that but if avg_net_rx_Gbps, use that
y_col = "avg_net_rx_Gbps" if "avg_net_rx_Gbps" in subset.columns else "net_rx_Gbps"

plt.figure(figsize=(12, 6))
plt.plot(subset["run"], subset[y_col], marker="o")
plt.xlabel("Run Number")
plt.ylabel("Avg Net RX Throughput (Gbps)")
plt.title("Throughput for 10 Runs\n(iperf, HaproxySubprocess, bbr, P1, T10)")
plt.xticks(subset["run"])
plt.grid(True)
plt.tight_layout()
plt.show()
"""
# to plot different times for one configuration
"""
parallels = ["P1", "P3", "P5"]
plt.figure(figsize=(8, 5))

for par in parallels:
    subset = agg_df[
        (agg_df["app"] == "iperf") &
        (agg_df["proxy"] == "HaproxySubprocess") &
        (agg_df["congestion"] == "bbr") &
        (agg_df["parallel"] == par) &
        (agg_df["duration"] == "T10")
    ].sort_values("run")
    plt.plot(subset["run"], subset["avg_net_rx_Gbps"], marker="o", label=f"{par}")

plt.xlabel("Run")
plt.ylabel("Avg Net RX Throughput (Gbps)")
plt.title("Per-run Throughput for Different Parallels (T10, bbr, HaproxySubprocess, iperf)")
plt.legend(title="Parallel")
plt.tight_layout()
plt.show()
"""

# 3 plots of per run throughput by proxy and parallel
"""
for par in parallels:
    plt.figure(figsize=(5, 3))
    for proxy in proxies:
        linestyle = proxy_linestyles.get(proxy, '-')
        subset = agg_df[
            (agg_df["app"] == "iperf") &
            (agg_df["proxy"] == proxy) &
            (agg_df["congestion"] == "bbr") &
            (agg_df["parallel"] == par) &
            (agg_df["duration"] == "T10")
        ].sort_values("run")
        if not subset.empty:
            plt.plot(
                subset["run"],
                subset["avg_net_rx_Gbps"],
                linestyle=linestyle,
                marker="*",
                label=f"{proxy}, {par}"
            )

    plt.xlabel("Run")
    plt.ylabel("Avg Net RX Throughput (Gbps)", fontsize='small')
    plt.title("Per-run Throughput by Proxy and Parallel (bbr, T10, iperf)", fontsize='small')
    plt.legend(title="Config/Parallel", fontsize='small')
    plt.tight_layout()
    plt.ylim(bottom=0)
    plt.show()
"""

# 3 subplots in one figure seperating by parallels 
"""
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 9), sharex=True, gridspec_kw={'hspace': 0.3})
for idx, par in enumerate(parallels):
    ax = axes[idx]
    for i, proxy in enumerate(proxies):
        linestyle = proxy_linestyles.get(proxy, '-')
        subset = agg_df[
            (agg_df["app"] == "iperf") &
            (agg_df["proxy"] == proxy) &
            (agg_df["congestion"] == "bbr") &
            (agg_df["parallel"] == par) &
            (agg_df["duration"] == "T10")
        ].sort_values("run")
        if not subset.empty:
            ax.plot( subset["run"], subset["avg_net_rx_Gbps"], marker="o", linestyle=linestyle, 
                    color=colors[i], label=proxy if idx == 0 else "")
    
    ax.set_ylabel(f"{par} (Gbps)")
    #ax.set_ylim(subset["avg_net_rx_Gbps"].min() - 1, subset["avg_net_rx_Gbps"].max() + 1)
    #ax.set_ylabel(f"{par} (Gbps)", labelpad=30, rotation=0, ha='left', va='center')
    #ax.yaxis.set_label_position("right")
    if idx < len(parallels) - 1:
        ax.tick_params(labelbottom=False)  # show the  x labels only for the last subplot

axes[-1].set_xlabel("Run")
fig.suptitle("Throughput Per Run by Parallel (Discontinuous Y, Shared X)")
axes[0].legend(title="Proxy")  # Show legend only once
#plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()
"""

# 1 plot with 3 subplots in one figure, seperating by parallels
"""
fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 9), sharex=True, gridspec_kw={'hspace': 0.3})
for idx, par in enumerate(parallels):
    ax = axes[idx]
    for i, proxy in enumerate(proxies):
        linestyle = proxy_linestyles.get(proxy, '-')
        subset = agg_df[
            (agg_df["app"] == "iperf") &
            (agg_df["proxy"] == proxy) &
            (agg_df["congestion"] == "bbr") &
            (agg_df["parallel"] == par) &
            (agg_df["duration"] == "T10")
        ].sort_values("run")
        if not subset.empty:
            ax.plot(
                subset["run"], subset["avg_net_rx_Gbps"],
                marker="o", linestyle=linestyle,
                color=colors[i], label=proxy if idx == 0 else ""
            )

    # y-axis label to the right
    ax.set_ylabel(f"{par} (Gbps)", labelpad=35)
    ax.grid(True, axis='y', linestyle='--', alpha=0.5),
    ax.yaxis.set_label_position("right")
    if idx < len(parallels) - 1:
        ax.tick_params(labelbottom=False)

axes[-1].set_xlabel("Run")
fig.suptitle("Throughput Per Run by Parallel (Discontinuous Y, Shared X)", y=1.04)

# break marks // between subplots
d = .03  # size of break mark
for i in range(len(parallels) - 1):
    kwargs = dict(transform=axes[i].transAxes, color='k', clip_on=False)
    axes[i].plot([-d, +d], [-d, +d], **kwargs)                                      # bottom left
    axes[i].plot([1 - d, 1 + d], [-d, +d], **kwargs)                                # bottom right
    kwargs2 = dict(transform=axes[i + 1].transAxes, color='k', clip_on=False)
    axes[i + 1].plot([-d, +d], [1 - d, 1 + d], **kwargs2)                           # top left
    axes[i + 1].plot([1 - d, 1 + d], [1 - d, 1 + d], **kwargs2)                     # top right

# legend outside the plot
handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, title="Proxy", loc='center left', bbox_to_anchor=(1.01, 0.5))

#plt.tight_layout(rect=[0, 0, 0.87, 1])
plt.show()
"""

# all the plots  seperating by parallels and durations showing the relevant fluctuations using //
"""
for dur in durations:
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 9), sharex=True, gridspec_kw={'hspace': 0.3})

    for idx, par in enumerate(parallels):
        ax = axes[idx]
        for i, proxy in enumerate(proxies):
            linestyle = proxy_linestyles.get(proxy, '-')
            subset = agg_df[
                (agg_df["app"] == "iperf") &
                (agg_df["proxy"] == proxy) &
                (agg_df["congestion"] == "bbr") &
                (agg_df["parallel"] == par) &
                (agg_df["duration"] == dur)
            ].sort_values("run")
            if not subset.empty:
                ax.plot(
                    subset["run"], subset["avg_net_rx_Gbps"],
                    marker="o", linestyle=linestyle,
                    color=colors[i], label=proxy if idx == 0 else ""
                )
        ax.set_ylabel(f"{par} (Gbps)", labelpad=10)
        ax.grid(True, axis='y', linestyle='--', alpha=0.5)
        ax.yaxis.set_label_position("right")
        if idx < len(parallels) - 1:
            ax.tick_params(labelbottom=False)

    axes[-1].set_xlabel("Run")
    fig.suptitle(f"Throughput Per Run by Parallel (Discontinuous Y, Shared X) | Duration: {dur}", y=.92)

    # break marks // between subplots
    d = .03
    for i in range(len(parallels) - 1):
        kwargs = dict(transform=axes[i].transAxes, color='k', clip_on=False)
        axes[i].plot([-d, +d], [-d, +d], **kwargs)                                      # bottom left
        axes[i].plot([1 - d, 1 + d], [-d, +d], **kwargs)                                # bottom right
        kwargs2 = dict(transform=axes[i + 1].transAxes, color='k', clip_on=False)
        axes[i + 1].plot([-d, +d], [1 - d, 1 + d], **kwargs2)                           # top left
        axes[i + 1].plot([1 - d, 1 + d], [1 - d, 1 + d], **kwargs2)                     # top right

    # legend outside the plot
    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(handles, labels, title="Proxy", loc='center left', bbox_to_anchor=(1.01, 0.5))

    #plt.tight_layout(rect=[0, 0, 0.87, 1])
    plt.show()
    """


In [None]:
# 4 plots run based plots for throughput of different configs
for app in apps:
    for congestion in congestions:
        nrows = len(parallels)
        ncols = len(durations)

        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6 * ncols, 3 * nrows), sharex=True)
        # if only one duration then axes will be 1D, so make it 2D for easier indexing
        if ncols == 1:
            axes = axes[:, np.newaxis]
        if nrows == 1:
            axes = axes[np.newaxis, :]

        for row_idx, par in enumerate(parallels):
            for col_idx, dur in enumerate(durations):
                ax = axes[row_idx, col_idx]
                for i, proxy in enumerate(proxies):
                    linestyle = proxy_linestyles.get(proxy, '-')
                    subset = agg_df[
                        (agg_df["app"] == app) &
                        (agg_df["proxy"] == proxy) &
                        (agg_df["congestion"] == congestion) &
                        (agg_df["parallel"] == par) &
                        (agg_df["duration"] == dur)
                    ].sort_values("run")
                    if not subset.empty:
                        ax.plot(
                            subset["run"], subset["avg_net_rx_Gbps"],
                            marker="o", linestyle=linestyle,
                            color=colors[i], label=proxy if (row_idx == 0 and col_idx == 0) else ""
                        )
                ax.set_ylim(bottom=0)
                # titles and labels
                if row_idx == 0:
                    ax.set_title(f"Duration: {dur}", fontsize=14)
                if col_idx == 3:
                    ax.set_ylabel(f"{par} (Gbps)", labelpad=10)
                ax.grid(True, axis='y', linestyle='--', alpha=0.5)
                ax.yaxis.set_label_position("right")
                if row_idx < len(parallels) - 1:
                    ax.tick_params(labelbottom=False)
                if row_idx == len(parallels) - 1:
                    ax.set_xlabel("Run")

        #legend outside the first subplot
        handles, labels = axes[0,0].get_legend_handles_labels()
        fig.legend(handles, labels, title=f"{congestion.upper()} - {app.upper()}", loc='center left', bbox_to_anchor=(.90, 0.5))
        fig.suptitle("Throughput: Run | Parallel & Duration", y=1.04, fontsize=16)
        plt.tight_layout(rect=[0, 0, 0.87, 1])
        plt.show()

In [None]:
"""df = df.copy()
df = df.sort_values(["app", "proxy", "congestion", "parallel", "duration", "run"]) 



def top_n_net_rx(group):
    n = int(group['duration'].iloc[0][1:])  # e.g., "T10" -> 10
    return group.sort_values('net_rx_Gbps', ascending=False).head(n)

topN_per_group = (
    df.groupby(['app', 'proxy', 'congestion', 'parallel', 'duration', 'run'], group_keys=False)
      .apply(top_n_net_rx)
      .reset_index(drop=True)
)

# 2. Add the second column (0, 1, ..., n-1) for each run
topN_per_group['second'] = topN_per_group.groupby(
    ['app', 'proxy', 'congestion', 'parallel', 'duration', 'run']
).cumcount()

# 3. Aggregate across runs for each second
group_cols = ['app', 'proxy', 'congestion', 'parallel', 'duration', 'second']
agg_per_sec = (
    topN_per_group
    .groupby(group_cols)
    .agg(
        mean_net_rx_Gbps=('net_rx_Gbps', 'mean'),
        std_net_rx_Gbps=('net_rx_Gbps', 'std'),
        count=('net_rx_Gbps', 'count'),
    )
    .reset_index()
)"""


for app in apps:
    for congestion in congestions:
        nrows = len(parallels)
        ncols = len(durations)
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6 * ncols, 3 * nrows))
        if ncols == 1:
            axes = axes[:, np.newaxis]
        if nrows == 1:
            axes = axes[np.newaxis, :]

        for row_idx, par in enumerate(parallels):
            for col_idx, dur in enumerate(durations):
                ax = axes[row_idx, col_idx]
                for i, proxy in enumerate(proxies):
                    linestyle = proxy_linestyles.get(proxy, '-')
                    color = colors[i]
                    subset = agg_per_sec[
                        (agg_per_sec["app"] == app) &
                        (agg_per_sec["proxy"] == proxy) &
                        (agg_per_sec["congestion"] == congestion) &
                        (agg_per_sec["parallel"] == par) &
                        (agg_per_sec["duration"] == dur)
                    ].sort_values("second")
                    # Only plot up to n seconds (duration)
                    n = int(str(dur)[1:])
                    subset = subset[subset["second"] < n]
                    if subset.empty:
                        continue
                    
                    x = subset["second"]
                    y = subset["mean_net_rx_Gbps"]
                    #std = subset["std_net_rx_Gbps"]
                    #ax.plot(x, y, marker="o", linestyle=linestyle, color=color, label=proxy if (row_idx == 0 and col_idx == 0) else "")
                    if not subset.empty:
                        ax.plot(
                            subset["second"], subset["mean_net_rx_Gbps"],
                            marker="+", linestyle=linestyle,
                            color=colors[i], label=proxy if (row_idx == 0 and col_idx == 0) else ""
                        )
                    
                    # add std band
                    #ax.fill_between(x, y - std, y + std, color=color, alpha=0.15)
                
                ax.set_xlim(left=0, right=n-1)
                ax.set_ylim(bottom=0)
                if row_idx == 0:
                    ax.set_title(f"Duration: {dur}", fontsize=14)
                if col_idx == 3:
                    ax.set_ylabel(f"{par} (Gbps)", labelpad=10)
                ax.grid(True, axis='y', linestyle='--', alpha=0.5)
                ax.yaxis.set_label_position("right")
                if row_idx < len(parallels) - 1:
                    ax.tick_params(labelbottom=False)
                if row_idx == len(parallels) - 1:
                    ax.set_xlabel("Second")

        handles, labels = axes[0,0].get_legend_handles_labels()
        fig.legend(handles, labels, title=f"{congestion.upper()} - {app.upper()}", loc='center left', bbox_to_anchor=(.90, 0.5))
        fig.suptitle("Mean Throughput per Second: Parallel & Duration", y=1.04, fontsize=16)
        plt.tight_layout(rect=[0, 0, 0.87, 1])
        plt.show()


In [None]:

if "second" not in df.columns:
    df["second"] = df.groupby(['app', 'proxy', 'congestion', 'parallel', 'duration', 'run']).cumcount()
#durations = sorted(df["duration"].unique(), key=lambda x: int(x[1:])) 

for dur in durations:
    # filter for one app, congestion, duration
    facet_df = df[
        (df["app"] == "iperf") &
        (df["congestion"] == "bbr") &
        (df["duration"] == dur)
    ].copy()

    # mean throughput per config per second
    facet_df_avg = (
        facet_df
        .groupby(['proxy', 'parallel', 'second'], as_index=False)["net_rx_Gbps"]
        .mean()
    )

    # rows = parallel, cols = proxy
    g = sns.FacetGrid(
        facet_df_avg, row="parallel", col="proxy", 
        margin_titles=True, height=3, aspect=2
    )
    for ax in g.axes.flat:
        ax.grid(True, axis='y', linestyle='--', alpha=0.5)
    g.map_dataframe(sns.lineplot, x="second", y="net_rx_Gbps", color='red')
    g.set_axis_labels("Second", "Avg Net RX Throughput (Gbps)")
    g.fig.subplots_adjust(top=0.9)
    g.fig.suptitle(f"Per-Second Avg Throughput by Parallel and Proxy\n(app=iperf, congestion=bbr, duration={dur})")
    plt.show()


In [None]:
#sets:  deep, muted, bright, pastel, dark, colorblind, husl, Set1, Set2, Set3, sns.color_palette("ch:s=-.2,r=.6", as_cmap=True)


g = sns.FacetGrid(
    agg_config_df, row="app", col="proxy", hue="congestion", 
    margin_titles=True, height=4, palette="husl"
)
for ax in g.axes.flat:
    ax.grid(True, axis='y', linestyle='--', alpha=0.5)
g.map(sns.barplot, "parallel", "mean_net_rx_Gbps", order=sorted(agg_config_df["parallel"].unique()), alpha=0.6, errorbar=None)
g.add_legend()
plt.show()


In [None]:
for dur in durations:
    subset = agg_config_df[agg_config_df["duration"] == dur]
    g = sns.FacetGrid(
        subset, 
        row="app", col="proxy", hue="congestion",
        margin_titles=True, height=4, palette="Set1"
    )
    for ax in g.axes.flat:
        ax.grid(True, axis='y', linestyle='--', alpha=0.5)
    g.map(sns.barplot, "parallel", "mean_net_tx_Gbps", order=sorted(subset["parallel"].unique()), alpha=0.6)
    g.add_legend()
    plt.suptitle(f"Duration: {dur}", y=1.02)
    plt.show()

In [None]:
for dur in durations:
    subset = agg_config_df[agg_config_df["duration"] == dur]
    g = sns.FacetGrid(
        subset, 
        row="app", col="proxy", hue="congestion",
        margin_titles=True, height=4
    )
    g.map(sns.barplot, "parallel", "mean_total_cpu", order=sorted(subset["parallel"].unique()), alpha=0.6)
    g.add_legend()
    g.set_axis_labels("Parallel Streams", "Mean Total CPU (%)")
    g.fig.suptitle(f"Mean Total CPU Usage (Duration: {dur})", y=1.03)
    plt.show()

In [None]:
g = sns.FacetGrid(
    agg_config_df, 
    row="app", col="proxy", hue="congestion", 
    margin_titles=True, height=4
)
g.map(
    sns.barplot,
    "parallel", "mean_total_cpu",
    order=sorted(agg_config_df["parallel"].unique()),
    alpha=0.6
)
g.add_legend()
g.set_axis_labels("Parallel Streams", "Mean Total CPU (%)")
g.fig.suptitle("Mean Total CPU Usage by Parallel, App, Proxy, Congestion", y=1.03)
plt.show()

In [None]:
durations = sorted(agg_config_df["duration"].unique(), key=lambda x: int(x[1:]))

for dur in durations:
    subset = agg_config_df[agg_config_df["duration"] == dur]
    g = sns.FacetGrid(subset, row="app", col="proxy", hue="congestion", margin_titles=True, height=4, aspect=1)
    g.map(sns.barplot, "parallel", "mean_total_cpu", order=sorted(subset["parallel"].unique()), alpha=0.6)
    
    g.add_legend()
    g.set_axis_labels("Parallel Streams", "Mean Total CPU (%)")
    g.fig.suptitle(f"Mean Total CPU Usage by Parallel, App, Proxy, Congestion\nDuration: {dur}", y=1.06)
    #plt.tight_layout()
    plt.show()


In [None]:
g = sns.FacetGrid(
    agg_config_df,
    row="congestion", col="proxy", hue="app",
    margin_titles=True, height=4
)
g.map(sns.barplot, "parallel", "mean_net_tx_Gbps", order=sorted(agg_config_df["parallel"].unique()), alpha=0.6)
g.add_legend()
plt.show()


In [None]:
g = sns.FacetGrid(
    agg_config_df,
    row="app", col="congestion", hue="proxy",
    margin_titles=True, height=3
)
g.map(sns.barplot, "parallel", "mean_net_tx_Gbps", order=sorted(agg_config_df["parallel"].unique()), alpha=0.6)
g.add_legend()
plt.show()


In [None]:
# 
g = sns.FacetGrid(
    agg_config_df,
    row="app", col="duration", hue="proxy",
    margin_titles=True, height=4
)
g.map(sns.barplot, "parallel", "mean_net_tx_Gbps", order=sorted(agg_config_df["parallel"].unique()), alpha=0.6)
g.add_legend()
plt.show()


In [None]:
g = sns.FacetGrid(
    agg_config_df,
    row="app", col="proxy", hue="parallel",
    margin_titles=True, height=4, palette="husl"
)
g.map(sns.barplot, "duration", "mean_net_tx_Gbps", order=sorted(agg_config_df["duration"].unique()), alpha=0.4)
g.add_legend()
plt.show()


In [None]:
"""
configs_to_plot = [
    ("HaproxySubprocess", "bbr"),
    ("HaproxySubprocess", "cubic"),
    ("StunnelSubprocess.tls.v1.2", "bbr"),
    ("StunnelSubprocess.tls.v1.2", "cubic"),
    ("StunnelSubprocess.tls.v1.3", "bbr"),   
    ("StunnelSubprocess.tls.v1.3", "cubic"),  
]

labels = []
avg_rx = []

for proxy, congestion in configs_to_plot:
    # str.startswith or strip() 
    subset = agg_config_df[
        (agg_config_df["proxy"] == proxy) &
        (agg_config_df["congestion"] == congestion)
    ]
    if not subset.empty:
        avg_value = subset["mean_net_tx_Gbps"].mean()
        avg_rx.append(avg_value)
    else:
        avg_rx.append(0)
    labels.append(f"{proxy}\n{congestion}")

plt.figure(figsize=(10,6))
bars = plt.bar(labels, avg_rx)
plt.ylabel("Avg Net RX Throughput (Gbps)")
plt.title("Avg Net RX Throughput by Proxy & Congestion")
plt.xticks(rotation=15, ha='right')
#plt.tight_layout()
#plt.show()"""


In [None]:
"""
agg_df["run"] = pd.to_numeric(agg_df["run"], errors="coerce")

example_config = {
    "app": "iperf",
    "proxy": "HaproxySubprocess",
    "congestion": "bbr",
    "parallel": "P1",
    "duration": "T10"
}

mask = (
    (agg_df["app"] == example_config["app"]) &
    (agg_df["proxy"] == example_config["proxy"]) &
    (agg_df["congestion"] == example_config["congestion"]) &
    (agg_df["parallel"] == example_config["parallel"]) &
    (agg_df["duration"] == example_config["duration"])
)

per_run = (
    agg_df[mask]
    .groupby("run")["avg_net_rx_Gbps"]
    .mean()
    .reset_index()
    .sort_values("run")
)


plt.figure(figsize=(8,5))
plt.plot(per_run["run"], per_run["avg_net_rx_Gbps"], marker='o', linestyle='-')
plt.xlabel("Run Number")
plt.ylabel("Avg Net RX Throughput (Gbps)")
plt.title("Per-Run Throughput (Averaged Over TopN Per Run)")
plt.grid(True)
plt.tight_layout()
plt.show()"""


group_cols = ["proxy", "congestion", "run"]
mean_per_run = (
    agg_df
    .groupby(group_cols)["avg_net_rx_Gbps"]
    .mean()
    .reset_index()
    .sort_values("run")
)

plt.figure(figsize=(12,6))
for (proxy, congestion), group in mean_per_run.groupby(["proxy", "congestion"]):
    plt.plot(
        group["run"], group["avg_net_rx_Gbps"], marker='o', label=f"{proxy} / {congestion}"
    )
plt.xlabel("Run Number")
plt.ylabel("Avg Net RX Throughput (Gbps)")
plt.title("Throughput Across Runs (All Proxies & Congestion)")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:

proxies = [
    "Nginx",
    "HaproxySubprocess",
    "StunnelSubprocess.v1.2",
    "StunnelSubprocess.v1.3"
]
congestions = ["bbr", "cubic"]

groups = [
    ("mini-apps", "Nginx"),
    ("mini-apps", "HaproxySubprocess"),
    ("mini-apps", "StunnelSubprocess.v1.2"),
    ("mini-apps", "StunnelSubprocess.v1.3"),
    ("iperf", "Nginx"),
    ("iperf", "HaproxySubprocess"),
    ("iperf", "StunnelSubprocess.v1.2"),
    ("iperf", "StunnelSubprocess.v1.3"),
]

congestions = ["bbr", "cubic"]
colors = ['tab:blue', 'tab:orange']


bar_data = []
for proxy in proxies:
    vals = []
    for congestion in congestions:
        subset = agg_config_df[
            (agg_config_df["proxy"] == proxy) &
            (agg_config_df["congestion"] == congestion)
        ]
        vals.append(subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0)
    bar_data.append(vals)

bar_data = np.array(bar_data) 

bar_width = 0.20
x = np.arange(len(congestions)) 

fig, ax = plt.subplots(figsize=(10,6))
colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red']

for i, (proxy, color) in enumerate(zip(proxies, colors)):
    ax.bar(x + i * bar_width, bar_data[i], width=bar_width, label=proxy, color=color)

ax.set_xticks(x + bar_width)
ax.set_xticklabels(congestions)
ax.set_ylabel("Avg Net RX Throughput (Gbps)")
ax.set_xlabel("Congestion Control")
ax.set_title("Throughput")
ax.legend(title="Proxy")
plt.tight_layout()
plt.show()


In [None]:
"""rx_bar_data = []
tx_bar_data = []
for proxy in proxies:
    rx_vals, tx_vals = [], []
    for congestion in congestions:
        subset = agg_config_df[
            (agg_config_df["proxy"] == proxy) &
            (agg_config_df["congestion"] == congestion)
        ]
        rx_vals.append(subset["sum_total_rx_dropped"].sum() if not subset.empty else 0)
        tx_vals.append(subset["sum_total_tx_dropped"].sum() if not subset.empty else 0)
    rx_bar_data.append(rx_vals)
    tx_bar_data.append(tx_vals)

rx_bar_data = np.array(rx_bar_data)
tx_bar_data = np.array(tx_bar_data)

bar_width = 0.18
x = np.arange(len(congestions))

fig, ax = plt.subplots(figsize=(12,6))
colors = ['tab:blue', 'tab:orange', 'tab:green']

for i, (proxy, color) in enumerate(zip(proxies, colors)):
    ax.bar(x + i * 2 * bar_width, rx_bar_data[i], width=bar_width, label=f"{proxy} RX", color=color, alpha=0.7)
    ax.bar(x + i * 2 * bar_width + bar_width, tx_bar_data[i], width=bar_width, label=f"{proxy} TX", color=color, hatch='//', alpha=0.7)

ax.set_xticks(x + bar_width)
ax.set_xticklabels(congestions)
ax.set_ylabel("Retransmissions (sum)")
ax.set_xlabel("Congestion Control")
ax.set_title("RX & TX (Retransmissions")
ax.legend(title="Legend", ncol=2)
plt.tight_layout()
plt.show()"""


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Define groups as tuples: (app, proxy)
"""groups = [
    ("mini-apps", "Nginx"),
    ("mini-apps", "HaproxySubprocess"),
    ("mini-apps", "StunnelSubprocess.v1.2"),
    ("mini-apps", "StunnelSubprocess.v1.3"),
    ("iperf", "Nginx"),
    ("iperf", "HaproxySubprocess"),
    ("iperf", "StunnelSubprocess.v1.2"),
    ("iperf", "StunnelSubprocess.v1.3"),
]"""
groups = [
    ("mini-apps", "Nginx"),
    ("iperf", "Nginx"),
    
    ("mini-apps", "HaproxySubprocess"),
    ("iperf", "HaproxySubprocess"),
    
    ("mini-apps", "StunnelSubprocess.v1.2"),
    ("iperf", "StunnelSubprocess.v1.2"),
    
    ("mini-apps", "StunnelSubprocess.v1.3"),
    ("iperf", "StunnelSubprocess.v1.3"),
]

congestions = ["bbr", "cubic"]
colors = ['tab:blue', 'tab:orange']

bar_width = 0.35
x = np.arange(len(groups))

# Prepare bar heights
bbr_vals, cubic_vals = [], []
for app, proxy in groups:
    for congestion in congestions:
        subset = agg_config_df[
            (agg_config_df["app"] == app) &
            (agg_config_df["proxy"] == proxy) &
            (agg_config_df["congestion"] == congestion)
        ]
        value = subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0
        if congestion == "bbr":
            bbr_vals.append(value)
        else:
            cubic_vals.append(value)

fig, ax = plt.subplots(figsize=(12,6))

# Plot bars side-by-side for each group
ax.bar(x - bar_width/2, bbr_vals, width=bar_width, color=colors[0], label='bbr')
ax.bar(x + bar_width/2, cubic_vals, width=bar_width, color=colors[1], label='cubic')

# Make x-tick labels as "app\nproxy"
xtick_labels = [f"{app}\n{proxy}" for (app, proxy) in groups]
ax.set_xticks(x)
ax.set_xticklabels(xtick_labels, rotation=15, ha='center')

ax.set_ylabel("Avg Net RX Throughput (Gbps)")
ax.set_xlabel("Experiment Group")
ax.set_title("Throughput by App, Proxy, and Congestion Control")
ax.legend(title="Congestion")
plt.tight_layout()
plt.show()


In [None]:
apps = agg_config_df["app"].unique()
proxies = agg_config_df["proxy"].unique()
congestions = ["bbr", "cubic"]

bar_values = []
labels = []
for proxy in proxies:
    for congestion in congestions:
        for app in apps:
            subset = agg_config_df[
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == congestion) &
                (agg_config_df["app"] == app)
            ]
            value = subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0
            bar_values.append(value)
            labels.append(f"{proxy}-{app}-{congestion}")
            

fig, ax = plt.subplots(figsize=(12,6))


# Plot bars side-by-side for each group
ax.bar(x - bar_width/2, bbr_vals, width=bar_width, color=colors[0], label='bbr')
ax.bar(x + bar_width/2, cubic_vals, width=bar_width, color=colors[1], label='cubic')

# Make x-tick labels as "app\nproxy"
xtick_labels = [f"{app}\n{proxy}" for (app, proxy) in groups]
ax.set_xticks(x)
ax.set_xticklabels(xtick_labels, rotation=15, ha='center')

ax.set_ylabel("Avg Net RX Throughput (Gbps)")
ax.set_xlabel("Experiment Group")
ax.set_title("Throughput by App, Proxy, and Congestion Control")
ax.legend(title="Congestion")
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

apps = ["mini-apps", "iperf"]
congestions = ["bbr", "cubic"]
"""proxies = [
    "Nginx",
    "HaproxySubprocess",
    "StunnelSubprocess.v1.2",
    "StunnelSubprocess.v1.3"
]"""
proxies = agg_config_df["proxy"].unique()
colors = ['tab:blue', 'tab:orange', 'tab:green']

# All group combinations
groups = [(app, cong) for app in apps for cong in congestions]
n_groups = len(groups)
n_proxies = len(proxies)
bar_width = 0.2
x = np.arange(n_groups)  # group locations

# Prepare bar heights: shape [n_proxies, n_groups]
bar_data = np.zeros((n_proxies, n_groups))
for group_idx, (app, cong) in enumerate(groups):
    for proxy_idx, proxy in enumerate(proxies):
        subset = agg_config_df[
            (agg_config_df["app"] == app) &
            (agg_config_df["congestion"] == cong) &
            (agg_config_df["proxy"] == proxy)
        ]
        bar_data[proxy_idx, group_idx] = subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0

# Plot
fig, ax = plt.subplots(figsize=(12,6))
for proxy_idx, (proxy, color) in enumerate(zip(proxies, colors)):
    ax.bar(
        x + proxy_idx * bar_width,
        bar_data[proxy_idx],
        width=bar_width,
        label=proxy,
        color=color
    )

# X-tick labels: app\ncongestion
xtick_labels = [f"{app}\n{cong}" for (app, cong) in groups]
ax.set_xticks(x + (n_proxies-1) * bar_width / 2)
ax.set_xticklabels(xtick_labels, rotation=0)

ax.set_ylabel("Avg Net RX Throughput (Gbps)")
ax.set_xlabel("App & Congestion")
ax.set_title("Throughput: Proxy Comparison by App and Congestion")
ax.legend(title="Proxy")
plt.tight_layout()
plt.show()


In [None]:
durations = sorted(agg_config_df["duration"].unique(), key=lambda x: int(x[1:]))

group_cols = ['app', 'proxy', 'parallel','congestion']
groups = agg_config_df[group_cols].drop_duplicates().values.tolist()

bar_data = np.zeros((len(durations), len(groups)))
for group_idx, (app, proxy, parallel, congestion) in enumerate(groups):
    for dur_idx, duration in enumerate(durations):
        subset = agg_config_df[
            (agg_config_df["app"] == app) &
            (agg_config_df["proxy"] == proxy) &
            (agg_config_df["parallel"] == parallel) &
            (agg_config_df["congestion"] == congestion) &
            (agg_config_df["duration"] == duration)
        ]
        bar_data[dur_idx, group_idx] = subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0

import matplotlib.pyplot as plt
import numpy as np

n_groups = len(groups)
n_durations = len(durations)
bar_width = 0.8 / n_durations   # Make bars not overflow
x = np.arange(n_groups)
duration_colors = plt.cm.viridis(np.linspace(0, 1, n_durations))  # or use your own color list

fig, ax = plt.subplots(figsize=(14, 6))

for dur_idx, duration in enumerate(durations):
    ax.bar(
        x + dur_idx * bar_width,
        bar_data[dur_idx],
        width=bar_width,
        label=duration,
        color=duration_colors[dur_idx]
    )


xtick_labels = [f"{app}\n{proxy}\n{parallel}\n{congestion}" for (app, proxy, parallel, congestion) in groups]
ax.set_xticks(x + bar_width * (n_durations - 1) / 2)
ax.set_xticklabels(xtick_labels, rotation=15, ha='center')

ax.set_ylabel("Avg Net RX Throughput (Gbps)")
ax.set_xlabel("App, Proxy, Parallel, Congestion")
ax.set_title("Throughput by Duration, App, Proxy, Congestion")
ax.legend(title="Duration")
plt.tight_layout()
plt.show()



In [None]:

durations = sorted(agg_config_df["duration"].unique(), key=lambda x: int(x[1:]))
parallels = sorted(agg_config_df["parallel"].unique(), key=lambda x: int(x[1:]))
apps = agg_config_df["app"].unique()
proxies = agg_config_df["proxy"].unique()
congestions = agg_config_df["congestion"].unique()

proxy_colors = dict(zip(proxies, plt.cm.tab10.colors)) 

for duration in durations:
    fig, ax = plt.subplots(figsize=(12, 5))
    x = np.arange(len(congestions))
    bar_width = 0.15
    for i, proxy in enumerate(proxies):
        heights = []
        for congestion in congestions:
            subset = agg_config_df[
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == congestion) &
                (agg_config_df["duration"] == duration)
            ]
            heights.append(subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0)
        ax.bar(x + i*bar_width, heights, width=bar_width, label=proxy, color=proxy_colors[proxy])
    ax.set_xticks(x + bar_width * (len(proxies)-1)/2)
    ax.set_xticklabels(congestions)
    ax.set_ylabel("Avg Net RX Throughput (Gbps)")
    ax.set_title(f"Throughput by Proxy and Congestion ({duration})")
    ax.legend(title="Proxy")
    plt.tight_layout()
    plt.show()


In [None]:
parallels = sorted(agg_config_df["parallel"].unique(), key=lambda x: int(x[1:]))
apps = agg_config_df["app"].unique()
proxies = agg_config_df["proxy"].unique()
congestions = agg_config_df["congestion"].unique()

for parallel in parallels:
    fig, ax = plt.subplots(figsize=(12, 5))
    x = np.arange(len(congestions))
    bar_width = 0.15
    for i, proxy in enumerate(proxies):
        heights = []
        for congestion in congestions:
            subset = agg_config_df[
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == congestion) &
                (agg_config_df["parallel"] == parallel)
            ]
            heights.append(subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0)
        ax.bar(x + i*bar_width, heights, width=bar_width, label=proxy, color=proxy_colors[proxy])
    ax.set_xticks(x + bar_width * (len(proxies)-1)/2)
    ax.set_xticklabels(congestions)
    ax.set_ylabel("Avg Net RX Throughput (Gbps)")
    ax.set_title(f"Throughput by Proxy and Congestion (Parallel {parallel})")
    ax.legend(title="Proxy")
    plt.tight_layout()
    plt.show()


In [None]:
duration_colors = dict(zip(durations, plt.cm.viridis(np.linspace(0,1,len(durations)))))

for proxy in proxies:
    fig, ax = plt.subplots(figsize=(12, 5))
    x = np.arange(len(congestions))
    bar_width = 0.15
    for i, duration in enumerate(durations):
        heights = []
        for congestion in congestions:
            subset = agg_config_df[
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == congestion) &
                (agg_config_df["duration"] == duration)
            ]
            heights.append(subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0)
        ax.bar(x + i*bar_width, heights, width=bar_width, label=duration, color=duration_colors[duration])
    ax.set_xticks(x + bar_width * (len(durations)-1)/2)
    ax.set_xticklabels(congestions)
    ax.set_ylabel("Avg Net RX Throughput (Gbps)")
    ax.set_title(f"Throughput by Duration and Congestion ({proxy})")
    ax.legend(title="Duration")
    plt.tight_layout()
    plt.show()


---------------

In [None]:

apps = agg_config_df["app"].unique()
proxies = agg_config_df["proxy"].unique()
congestions = agg_config_df["congestion"].unique()

for app in apps:
    fig, ax = plt.subplots(figsize=(10,6))
    bar_width = 0.2
    x = np.arange(len(congestions))
    for i, proxy in enumerate(proxies):
        vals = []
        for cong in congestions:
            subset = agg_config_df[
                (agg_config_df["app"] == app) &
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == cong)
            ]
            vals.append(subset["mean_net_tx_Gbps"].mean() if not subset.empty else 0)
        ax.bar(x + i*bar_width, vals, width=bar_width, label=proxy)
    ax.set_xticks(x + bar_width*(len(proxies)-1)/2)
    ax.set_xticklabels(congestions)
    ax.set_ylabel("Avg Net RX Throughput (Gbps)")
    ax.set_title(f"Throughput by Proxy & Congestion ({app})")
    ax.legend()
    plt.tight_layout()
    plt.show()

In [None]:
"""for app in apps:
    for proxy in proxies:
        fig, ax = plt.subplots(figsize=(6,4))
        for cong in congestions:
            subset = agg_config_df[
                (agg_config_df["app"] == app) &
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == cong)
            ]
            if not subset.empty:
                ax.plot(
                    subset["parallel"], subset["mean_net_tx_Gbps"], 
                    marker='o', label=f"{cong}"
                )
        ax.set_xlabel("Parallel Streams")
        ax.set_ylabel("Avg Net RX Throughput (Gbps)")
        ax.set_title(f"Throughput by Parallel Streams ({app}, {proxy})")
        ax.legend(title="Congestion")
        plt.tight_layout()
        plt.show()"""


apps = agg_config_df["app"].unique()
proxies = agg_config_df["proxy"].unique()
congestions = agg_config_df["congestion"].unique()

# different line style to each proxy
proxy_linestyles = {
    "Nginx": '-',
    "HaproxySubprocess": '--',
    "StunnelSubprocess.v1.2": '-.',
    "StunnelSubprocess.v1.3": ':'
}

for app in apps:
    fig, ax = plt.subplots(figsize=(10, 6))
    for proxy in proxies:
        linestyle = proxy_linestyles.get(proxy, '-')
        for cong in congestions:
            subset = agg_config_df[
                (agg_config_df["app"] == app) &
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == cong)
            ].sort_values("parallel")
            if not subset.empty:
                ax.plot(
                    subset["parallel"], subset["mean_net_tx_Gbps"],
                    marker='o',
                    linestyle=linestyle,
                    label=f"{proxy} - {cong}"
                )
    ax.set_xlabel("Parallel Streams")
    ax.set_ylabel("Avg Net RX Throughput (Gbps)")
    ax.set_title(f"Throughput by Parallel Streams ({app})")
    ax.legend(title="Proxy / Congestion")
    plt.tight_layout()
    plt.show()


In [None]:
"""for app in apps:
    for proxy in proxies:
        for cong in congestions:
            subset = agg_config_df[
                (agg_config_df["app"] == app) &
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == cong)
            ]
            if not subset.empty:
                plt.figure(figsize=(8,4))
                plt.plot(
                    subset["duration"].apply(lambda x: int(x[1:])), 
                    subset["mean_net_tx_Gbps"], marker='o'
                )
                plt.xlabel("Duration (seconds)")
                plt.ylabel("Avg Net RX Throughput (Gbps)")
                plt.title(f"Throughput vs Duration ({app}, {proxy}, {cong})")
                plt.tight_layout()
                plt.show()"""


In [None]:
"""for app in apps:
    fig, ax = plt.subplots(figsize=(10,6))
    bar_width = 0.2
    x = np.arange(len(congestions))
    for i, proxy in enumerate(proxies):
        vals = []
        for cong in congestions:
            subset = agg_config_df[
                (agg_config_df["app"] == app) &
                (agg_config_df["proxy"] == proxy) &
                (agg_config_df["congestion"] == cong)
            ]
            vals.append(subset["sum_total_rx_dropped"].sum() if not subset.empty else 0)
        ax.bar(x + i*bar_width, vals, width=bar_width, label=proxy)
    ax.set_xticks(x + bar_width*(len(proxies)-1)/2)
    ax.set_xticklabels(congestions)
    ax.set_ylabel("Total RX Retransmissions")
    ax.set_title(f"Retransmissions by Proxy & Congestion ({app})")
    ax.legend()
    plt.tight_layout()
    plt.show()"""


In [None]:

plt.figure(figsize=(14, 6))
sns.boxplot(
    data=df, 
    x="proxy", 
    y="net_rx_Gbps", 
    hue="congestion"
)
plt.title("Run-to-Run Throughput Variability")
plt.tight_layout()
plt.show()


In [None]:
"""plt.figure(figsize=(14, 6))
sns.boxplot(
    data=df, 
    x="proxy", 
    y="total_rx_dropped", 
    hue="congestion"
)
plt.title("Run-to-Run Retransmission Variability")
plt.tight_layout()
plt.show()
"""

In [None]:

"""run_subset = df[
    (df["app"] == "mini-apps") & 
    (df["proxy"] == "Nginx") & 
    (df["congestion"] == "bbr") &
    (df["run"] == 10)
]
plt.plot(run_subset["timestamp"], run_subset["net_rx_Gbps"])
plt.xlabel("Time")
plt.ylabel("Net RX Throughput (Gbps)")
plt.title("Throughput Over Time (Example Run)")
plt.tight_layout()
plt.show()
"""

In [None]:


# (Optional) Sort or filter your DataFrame as needed
# agg_config_df = agg_config_df[agg_config_df["app"] == "mini-apps"] # example

# Create a column with combined label for faceting (optional, or use row/col separately)
agg_config_df["facet"] = agg_config_df["proxy"] + "\n" + agg_config_df["congestion"]

# Get all unique proxies and congestions for faceting
proxies = agg_config_df["proxy"].unique()
congestions = agg_config_df["congestion"].unique()

# Set up the FacetGrid
g = sns.FacetGrid(
    agg_config_df,
    row="proxy", col="congestion",
    margin_titles=True, height=3,
    sharex=True, sharey=True
)

# Define a function to draw a heatmap on each facet
def draw_heatmap(data, **kwargs):
    pivot = data.pivot_table(
        index="parallel", columns="duration", values="mean_net_tx_Gbps", aggfunc="mean"
    )
    sns.heatmap(
        pivot,
        annot=True, fmt=".2f", cmap="viridis",
        cbar=False,  # Only add colorbar to one facet for clarity
        **kwargs
    )

# Map the function onto the FacetGrid
g.map_dataframe(draw_heatmap)

# Set overall title and axis labels
plt.subplots_adjust(top=0.85)
g.fig.suptitle("Throughput by Parallel, Duration (Faceted by Proxy and Congestion)")
g.set_axis_labels("Duration", "Parallel Streams")
plt.show()


In [None]:
best_df = (
    agg_config_df
    .groupby(['app', 'proxy', 'congestion'])
    .agg(max_rx=('mean_net_tx_Gbps', 'max'))
    .reset_index()
)
plt.figure(figsize=(12,6))
sns.barplot(
    data=best_df, 
    x="proxy", y="max_rx", hue="congestion"
)
plt.title("Best Throughput per App, Proxy, Congestion")
plt.tight_layout()
plt.show()


In [None]:
summary = (
    agg_config_df
    .groupby(['app', 'proxy', 'congestion'])
    .agg(
        mean_rx=('mean_net_tx_Gbps', 'mean'),
        mean_retrans=('sum_total_rx_dropped', 'mean'),
        max_cpu=('max_total_cpu', 'max')
    )
    .reset_index()
)
print(summary)
