In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import re
import os
import json
from datetime import datetime
from pathlib import Path

extract 1:

In [2]:
BASE_DIR = Path("/home/seena/Projects/chameleon/chi_mnt/experiments/exp1")

In [3]:
def human_readable_bytes(num):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if abs(num) < 1024.0:
            return f"{num:.2f} {unit}"
        num /= 1024.0
    return f"{num:.2f} PB"

def human_readable_bps(num):
    for unit in ['bps', 'Kbps', 'Mbps', 'Gbps', 'Tbps']:
        if abs(num) < 1000.0:
            return f"{num:.2f} {unit}"
        num /= 1000.0
    return f"{num:.2f} Pbps"


def cons_iperf_jsons(file_path, base_dir):
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)

        start = data.get("start", {})
        end = data.get("end", {})
        test_start = start.get("test_start", {})
        cpu = end.get("cpu_utilization_percent", {})

        timestamp = start.get("timestamp", {}).get("time", "")
        timestamp = datetime.strptime(timestamp, "%a, %d %b %Y %H:%M:%S %Z").isoformat()

        rel_path = str(Path(file_path).relative_to(base_dir))

        sent = end.get("sum_sent", {})
        received = end.get("sum_received", {})

        connected = start.get("connected", [{}])[0]  # local/remote host info

        return {
            "path": rel_path,
            "timestamp": timestamp,

            "local_host": connected.get("local_host", ""),
            "remote_host": connected.get("remote_host", ""),
            "tcp_mss_default": start.get("tcp_mss_default"),
            #"sndbuf_actual": start.get("sndbuf_actual"),
            #"rcvbuf_actual": start.get("rcvbuf_actual"),

            "blksize": test_start.get("blksize"),
            "duration": (test_start.get("duration")),
            "omit": test_start.get("omit"),
            "protocol": test_start.get("protocol"),
            "num_streams": test_start.get("num_streams"),
            "reverse": test_start.get("reverse"),
            "port": start.get("connecting_to", {}).get("port"),
            "congestion": end.get("sender_tcp_congestion"),

            "bytes_sent": human_readable_bytes(sent.get("bytes", 0)),
            "bps_sent": human_readable_bps(sent.get("bits_per_second", 0)),
            "retransmissions": sent.get("retransmits", 0),

            "bytes_received": human_readable_bytes(received.get("bytes", 0)),
            "bps_received": human_readable_bps(received.get("bits_per_second", 0)),

            "host_cpu_total": round(cpu.get("host_total", 0), 1),
            "host_user": round(cpu.get("host_user", 0), 1),
            "host_system": round(cpu.get("host_system", 0), 1),
            "remote_cpu_total": round(cpu.get("remote_total", 0), 1),
            "remote_user": round(cpu.get("remote_user", 0), 1),
            "remote_system": round(cpu.get("remote_system", 0), 1),
        }

    except Exception as e:
        print(f"[ERROR] Failed to parse {file_path}: {e}")
        return None


"""def find_cons_iperf_jsons(base_dir):
    files = []
    for root, _, filenames in os.walk(base_dir):
        for name in filenames:
            if name.endswith(".json") and "iperf" in name:
                files.append(os.path.join(root, name))
    return files"""

def find_cons_iperf_jsons(base_dir):
    """Find all iperf JSON files within the experiment directory."""
    base_dir = os.path.expanduser(base_dir)
    iperf_jsons = []
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.startswith("iperf_") and file.endswith(".json"):
                full_path = os.path.join(root, file)
                print(f"[DEBUG] Found iperf JSON: {full_path}")
                iperf_jsons.append(full_path)
    return iperf_jsons


def cons_all_iperf_jsons(base_dir):
    iperf_files = find_cons_iperf_jsons(base_dir)
    print(f"[INFO] Discovered {len(iperf_files)} iperf JSON files.")

    records = []
    for i, file in enumerate(iperf_files, 1):
        print(f"[{i}/{len(iperf_files)}] Processing: {file}")
        record = cons_iperf_jsons(file, base_dir)
        if record:
            records.append(record)

    print(f"[INFO] Successfully parsed {len(records)} records.")

    df = pd.DataFrame(records)
    if not df.empty:
        df = df.sort_values(by="path").reset_index(drop=True)
        #print("\n[PREVIEW] Sorted DataFrame:")
        print(df.head(20))
    #else:
        print("\n[WARNING] No valid iperf records found.")

    print("\n[FINISHED] Extraction complete.")
    return df


"""if __name__ == "__main__":
    BASE_DIR = Path("/home/seena/Projects/chameleon/chi_mnt/exps/exp2/cons")
    df_iperf = cons_all_iperf_jsons(BASE_DIR)

    OUTPUT_DIR = BASE_DIR / "datas"
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    df_iperf.to_csv(OUTPUT_DIR / "extract_data_1.csv", index=False)
    print(f"[INFO] Saved full DataFrame to {OUTPUT_DIR / 'extract_data_1.csv'}")"""


'if __name__ == "__main__":\n    BASE_DIR = Path("/home/seena/Projects/chameleon/chi_mnt/exps/exp2/cons")\n    df_iperf = cons_all_iperf_jsons(BASE_DIR)\n\n    OUTPUT_DIR = BASE_DIR / "datas"\n    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n\n    df_iperf.to_csv(OUTPUT_DIR / "extract_data_1.csv", index=False)\n    print(f"[INFO] Saved full DataFrame to {OUTPUT_DIR / \'extract_data_1.csv\'}")'

In [4]:
#BASE_DIR = Path("/home/seena/Projects/chameleon/chi_mnt/exps/exp1/cons")
df_iperf = cons_all_iperf_jsons(BASE_DIR)

OUTPUT_DIR = BASE_DIR / "datas"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

df_iperf.to_csv(OUTPUT_DIR / "extract_data_1.csv", index=False)
print(f"[INFO] Saved full DataFrame to {OUTPUT_DIR / 'extract_data_1.csv'}")

[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments/exp1/cubic_StunnelSubprocess/P3/T30_R2/iperf_5100.json
[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments/exp1/cubic_StunnelSubprocess/P3/T30_R3/iperf_5100.json
[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments/exp1/cubic_StunnelSubprocess/P3/T30_R5/iperf_5100.json
[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments/exp1/cubic_StunnelSubprocess/P3/T30_R4/iperf_5100.json
[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments/exp1/cubic_StunnelSubprocess/P3/T30_R1/iperf_5100.json
[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments/exp1/cubic_StunnelSubprocess/P1/T30_R2/iperf_5100.json
[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments/exp1/cubic_StunnelSubprocess/P1/T30_R3/iperf_5100.json
[DEBUG] Found iperf JSON: /home/seena/Projects/chameleon/chi_mnt/experiments

extract 2:

In [5]:
def parse_path_info(path_str):
    parts = Path(path_str).parts
    congestion_proxy = parts[0].split("_")
    parallel = parts[1]
    test_info = parts[2]
    port_file = parts[3]

    proxy = congestion_proxy[1]
    p_val = int(parallel[1:])

    match = re.match(r"T(\d+)_R(\d+)", test_info)
    run = int(match.group(2))

    return {
        "proxy": proxy,
        "parallel": p_val,
        "run": run
    }


def parse_bytes(val):
    if isinstance(val, str):
        num, unit = val.split()
        num = float(num)
        return num * {"B": 1, "KB": 1024, "MB": 1024**2, "GB": 1024**3}.get(unit, 1)
    return val


def parse_bps(val):
    if isinstance(val, str):
        num, unit = val.split()
        num = float(num)
        return num * {"bps": 1, "Kbps": 1e3, "Mbps": 1e6, "Gbps": 1e9}.get(unit, 1)
    return val


def human_readable_bytes(num):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if abs(num) < 1024.0:
            return f"{num:.2f} {unit}"
        num /= 1024.0
    return f"{num:.2f} PB"


def human_readable_bps(num):
    for unit in ['bps', 'Kbps', 'Mbps', 'Gbps', 'Tbps']:
        if abs(num) < 1000.0:
            return f"{num:.2f} {unit}"
        num /= 1000.0
    return f"{num:.2f} Pbps"


def summarize_iperf_dataframe(csv_path):
    df = pd.read_csv(csv_path)

    # Extract structured metadata from path
    path_info_df = df["path"].apply(parse_path_info).apply(pd.Series)
    df[path_info_df.columns] = path_info_df

    # Parse numeric bytes/bps
    for col in ["bytes_sent", "bytes_received"]:
        df[col] = df[col].apply(parse_bytes)
    for col in ["bps_sent", "bps_received"]:
        df[col] = df[col].apply(parse_bps)

    # Group
    df_grouped = (
        df.groupby(["congestion", "proxy", "parallel", "duration", "run"])
        .agg({
            "bytes_sent": "sum",
            "bps_sent": "sum",
            "retransmissions": "sum",
            "bytes_received": "sum",
            "bps_received": "sum",
            "host_cpu_total": "mean",
            "host_user": "mean",
            "host_system": "mean",
            "remote_cpu_total": "mean",
            "remote_user": "mean",
            "remote_system": "mean",
        })
        .reset_index()
    )

    # Format: bytes, bps → human readable; float cols → 1 decimal
    df_grouped["bytes_sent"] = df_grouped["bytes_sent"].apply(human_readable_bytes)
    df_grouped["bps_sent"] = df_grouped["bps_sent"].apply(human_readable_bps)
    df_grouped["bytes_received"] = df_grouped["bytes_received"].apply(human_readable_bytes)
    df_grouped["bps_received"] = df_grouped["bps_received"].apply(human_readable_bps)

    for col in [
        "host_cpu_total", "host_user", "host_system",
        "remote_cpu_total", "remote_user", "remote_system"
    ]:
        df_grouped[col] = df_grouped[col].round(1)

    return df_grouped


"""if __name__ == "__main__":
    BASE_DIR = Path("/home/seena/Projects/chameleon/chi_mnt/exps/exp2/cons")
    csv_input_path = BASE_DIR / "datas" / "extract_data_1.csv"

    df_summary = summarize_iperf_dataframe(csv_input_path)

    OUTPUT_DIR = BASE_DIR / "datas"
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

    output_path = OUTPUT_DIR / "extract_data_2.csv"
    df_summary.to_csv(output_path, index=False)

    print(f"[INFO] Saved readable summary to {output_path}")"""

'if __name__ == "__main__":\n    BASE_DIR = Path("/home/seena/Projects/chameleon/chi_mnt/exps/exp2/cons")\n    csv_input_path = BASE_DIR / "datas" / "extract_data_1.csv"\n\n    df_summary = summarize_iperf_dataframe(csv_input_path)\n\n    OUTPUT_DIR = BASE_DIR / "datas"\n    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n\n    output_path = OUTPUT_DIR / "extract_data_2.csv"\n    df_summary.to_csv(output_path, index=False)\n\n    print(f"[INFO] Saved readable summary to {output_path}")'

In [6]:
#BASE_DIR = Path("/home/seena/Projects/chameleon/chi_mnt/exps/exp2/cons")
csv_input_path = BASE_DIR / "datas" / "extract_data_1.csv"

df_summary = summarize_iperf_dataframe(csv_input_path)

OUTPUT_DIR = BASE_DIR / "datas"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

output_path = OUTPUT_DIR / "extract_data_2.csv"
df_summary.to_csv(output_path, index=False)

print(f"[INFO] Saved readable summary to {output_path}")

EmptyDataError: No columns to parse from file

Plot:

In [None]:
def parse_bps(val):
    if isinstance(val, str):
        parts = val.split()
        if len(parts) == 2:
            num, unit = parts
            num = float(num)
            return num * {"bps": 1, "Kbps": 1e3, "Mbps": 1e6, "Gbps": 1e9}.get(unit, 1)
    return pd.to_numeric(val, errors="coerce")


In [None]:
"""def plot_metric(metric, ylabel):
    for duration in durations:
        for parallel in parallels:
            plt.figure(figsize=(10, 6))
            subset = df[(df["duration"] == duration) & (df["parallel"] == parallel)]

            for (proxy, congestion), group in subset.groupby(["proxy", "congestion"]):
                group = group.sort_values("run")
                #label = f"{proxy} + {congestion}"
                avg_val = group[metric].mean()
                label = f"{proxy} + {congestion} (avg={avg_val:.2f})"
                plt.plot(group["run"], group[metric], marker="o", label=label)

            plt.title(f"{ylabel} - Duration {duration}s - Parallel {parallel}")
            plt.xlabel("Run")
            plt.ylabel(ylabel)
            plt.grid(True)
            plt.legend()
            plt.tight_layout()
            plt.show()"""
def plot_metric(metric, ylabel):
    for duration in durations:
        plt.figure(figsize=(10, 6))
        subset = df[df["duration"] == duration]

        for (proxy, congestion, parallel), group in subset.groupby(["proxy", "congestion", "parallel"]):
            group = group.sort_values("run")
            avg_val = group[metric].mean()
            label = f"{proxy} + {congestion} P{parallel} (avg={avg_val:.2f})"
            plt.plot(group["run"], group[metric], marker="o", label=label)

        plt.title(f"{ylabel} - Duration {duration}s")
        plt.xlabel("Run")
        plt.ylabel(ylabel)
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()

In [None]:
#df = pd.read_csv("/home/seena/Projects/chameleon/chi_mnt/exps/exp2/cons/datas/extract_data_2.csv")
df = pd.read_csv(output_path)


df["bps_received"] = df["bps_received"].apply(parse_bps)
df["throughput_gbps"] = df["bps_received"] / 1e9
df["retransmissions"] = pd.to_numeric(df["retransmissions"], errors="coerce")

durations = sorted(df["duration"].dropna().unique())
parallels = sorted(df["parallel"].dropna().unique())

plot_metric("throughput_gbps", "Throughput (Gbps)")
#plot_metric("retransmissions", "Retransmissions")