# Table 3: Comparing data augmentation functions applied in supervised training.

[:simple-jupyter: :material-download:](../../paper_tables_and_figures/table3_ucdavis-icdm19_comparing_data_augmentations_functions/table3_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb)

In [1]:
import numpy as np
import pandas as pd
import statsmodels.stats.api as sms

In [2]:
import itertools
import pathlib
import tempfile

In [None]:
def compute_ci95(ser):
    low, high = sms.DescrStatsW(ser.values).tconfint_mean(alpha=0.05)
    mean = ser.mean()
    ci = high - mean
    return ci

In [7]:
folder_campaign_summary = pathlib.Path(
    "campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/1684447037/"
)

In [8]:
# load results
df = pd.concat(
    [
        pd.read_parquet(folder_campaign_summary / "runsinfo_flowpic_dim_32.parquet"),
        pd.read_parquet(folder_campaign_summary / "runsinfo_flowpic_dim_64.parquet"),
        pd.read_parquet(folder_campaign_summary / "runsinfo_flowpic_dim_1500.parquet"),
    ]
)

In [15]:
df_agg_dict = dict()
for flowpic_dim in (32, 64, 1500):
    df_tmp = df[df["flowpic_dim"] == flowpic_dim]
    df_agg = df_tmp.groupby(["test_split_name", "aug_name"]).agg(
        {"acc": ["count", "mean", "std", compute_ci95]}
    )
    df_agg = df_agg.droplevel(0, axis=1).rename({"compute_ci95": "ci95"}, axis=1)
    fname = folder_campaign_summary / f"summary_flowpic_dim_{flowpic_dim}.csv"
    df_agg_dict[flowpic_dim] = df_agg

In [140]:
# loading imc22-paper results
# (there are oviously copied)

IMC22_TABLE_TEST_SCRIPT = """
aug_name,32,64,1500
No augmentation,98.67,99.1,96.22
Rotate,98.6,98.87,94.89
Horizontal flip,98.93,99.27,97.33
Color jitter,96.73,96.4,94.0
Packet loss,98.73,99.6,96.22
Time shift,99.13,99.53,97.56
Change rtt,99.4,100.0,98.44
"""

IMC22_TABLE_TEST_HUMAN = """
aug_name,32,64,1500
No augmentation,92.4,85.6,73.3
Rotate,93.73,87.07,77.3
Horizontal flip,94.67,79.33,87.9
Color jitter,82.93,74.93,68.0
Packet loss,90.93,85.6,84.0
Time shift,92.8,87.33,77.3
Change rtt,96.4,88.6,90.7
"""

with tempfile.NamedTemporaryFile("w") as f_tmp:
    f_tmp.write(IMC22_TABLE_TEST_SCRIPT)
    f_tmp.seek(0)
    df_imc22_table_test_script = pd.read_csv(f_tmp.name)
    df_imc22_table_test_script = df_imc22_table_test_script.set_index("aug_name")
    df_imc22_table_test_script.columns = pd.MultiIndex.from_product(
        [["imc22-paper"], df_imc22_table_test_script.columns, ["mean"]]
    )

with tempfile.NamedTemporaryFile("w") as f_tmp:
    f_tmp.write(IMC22_TABLE_TEST_HUMAN)
    f_tmp.seek(0)
    df_imc22_table_test_human = pd.read_csv(f_tmp.name)
    df_imc22_table_test_human = df_imc22_table_test_human.set_index("aug_name")
    df_imc22_table_test_human.columns = pd.MultiIndex.from_product(
        [["imc22-paper"], df_imc22_table_test_human.columns, ["mean"]]
    )

In [141]:
RENAMING = {
    "test-human": "human",
    "test-script": "script",
    "test-train-val-leftover": "leftover",
    "noaug": "No augmentation",
    "changertt": "Change rtt",
    "colorjitter": "Color jitter",
    "horizontalflip": "Horizontal flip",
    "packetloss": "Packet loss",
    "rotate": "Rotate",
    "timeshift": "Time shift",
}

AUG_NAME_ORDER = [
    "No augmentation",
    "Rotate",
    "Horizontal flip",
    "Color jitter",
    "Packet loss",
    "Time shift",
    "Change rtt",
]

partial_dfs = {
    "human": dict(),
    "script": dict(),
    "leftover": dict(),
}
for flowpic_dim in (32, 64, 1500):
    df_tmp = df_agg_dict[flowpic_dim][["mean", "ci95"]].round(2).reset_index()
    df_tmp = df_tmp.assign(
        test_split_name=df_tmp["test_split_name"].replace(RENAMING),
        aug_name=df_tmp["aug_name"].replace(RENAMING),
    )
    df_tmp = df_tmp.set_index("test_split_name", drop=True)
    for split_name in ("script", "human", "leftover"):
        df_partial = df_tmp.loc[split_name].copy()
        df_partial = df_partial.set_index("aug_name", drop=True)
        df_partial = df_partial.loc[AUG_NAME_ORDER]
        partial_dfs[split_name][flowpic_dim] = df_partial

In [142]:
df_ours_script = pd.concat(partial_dfs["script"], axis=1)
df_ours_script.columns = pd.MultiIndex.from_product(
    [["ours"], *df_ours_script.columns.levels]
)

df_ours_human = pd.concat(partial_dfs["human"], axis=1)
df_ours_human.columns = pd.MultiIndex.from_product(
    [["ours"], *df_ours_human.columns.levels]
)

df_ours_leftover = pd.concat(partial_dfs["leftover"], axis=1)
df_ours_leftover.columns = pd.MultiIndex.from_product(
    [["ours"], *df_ours_leftover.columns.levels]
)

In [143]:
print("=== test on script ===")
df_tmp = pd.concat((df_imc22_table_test_script, df_ours_script), axis=1)

df_tmp.loc["mean_diff", :] = np.nan
df_tmp.loc["mean_diff", ("ours", 32, "mean")] = (
    (df_tmp[("ours", 32, "mean")] - df_tmp[("imc22-paper", "32", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 64, "mean")] = (
    (df_tmp[("ours", 64, "mean")] - df_tmp[("imc22-paper", "64", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 1500, "mean")] = (
    (df_tmp[("ours", 1500, "mean")] - df_tmp[("imc22-paper", "1500", "mean")])
    .mean()
    .round(2)
)
df_tmp.fillna("")

=== test on script ===


Unnamed: 0_level_0,imc22-paper,imc22-paper,imc22-paper,ours,ours,ours,ours,ours,ours
Unnamed: 0_level_1,32,64,1500,32,32,64,64,1500,1500
Unnamed: 0_level_2,mean,mean,mean,mean,ci95,mean,ci95,mean,ci95
aug_name,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
No augmentation,98.67,99.1,96.22,95.73,0.27,95.96,0.29,94.44,0.9
Rotate,98.6,98.87,94.89,96.36,0.39,96.89,0.39,95.47,0.47
Horizontal flip,98.93,99.27,97.33,95.11,0.41,95.96,0.49,95.11,0.68
Color jitter,96.73,96.4,94.0,97.87,0.45,97.42,0.67,94.89,0.83
Packet loss,98.73,99.6,96.22,96.98,0.48,96.89,0.53,95.96,0.7
Time shift,99.13,99.53,97.56,96.71,0.51,97.11,0.36,96.8,0.32
Change rtt,99.4,100.0,98.44,97.33,0.39,97.29,0.35,96.8,0.35
mean_diff,,,,-2.01,,-2.18,,-0.74,


In [144]:
print("=== test on human ===")
df_tmp = pd.concat((df_imc22_table_test_human, df_ours_human), axis=1)

df_tmp.loc["mean_diff", :] = np.nan
df_tmp.loc["mean_diff", ("ours", 32, "mean")] = (
    (df_tmp[("ours", 32, "mean")] - df_tmp[("imc22-paper", "32", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 64, "mean")] = (
    (df_tmp[("ours", 64, "mean")] - df_tmp[("imc22-paper", "64", "mean")])
    .mean()
    .round(2)
)
df_tmp.loc["mean_diff", ("ours", 1500, "mean")] = (
    (df_tmp[("ours", 1500, "mean")] - df_tmp[("imc22-paper", "1500", "mean")])
    .mean()
    .round(2)
)
df_tmp.fillna("")

=== test on human ===


Unnamed: 0_level_0,imc22-paper,imc22-paper,imc22-paper,ours,ours,ours,ours,ours,ours
Unnamed: 0_level_1,32,64,1500,32,32,64,64,1500,1500
Unnamed: 0_level_2,mean,mean,mean,mean,ci95,mean,ci95,mean,ci95
aug_name,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
No augmentation,92.4,85.6,73.3,69.48,1.17,69.88,1.26,68.67,1.07
Rotate,93.73,87.07,77.3,71.57,1.95,71.0,1.35,67.87,0.86
Horizontal flip,94.67,79.33,87.9,69.8,1.39,70.92,1.83,73.82,0.82
Color jitter,82.93,74.93,68.0,68.84,2.59,71.33,1.86,68.59,1.76
Packet loss,90.93,85.6,84.0,71.0,1.02,73.17,0.89,72.13,1.04
Time shift,92.8,87.33,77.3,70.36,1.65,72.53,1.02,70.84,1.34
Change rtt,96.4,88.6,90.7,70.04,2.44,72.05,1.16,72.69,1.48
mean_diff,,,,-21.82,,-12.51,,-9.13,


In [145]:
print("=== test on leftover ===")
df_ours_leftover

=== test on leftover ===


Unnamed: 0_level_0,ours,ours,ours,ours,ours,ours
Unnamed: 0_level_1,32,32,64,64,1500,1500
Unnamed: 0_level_2,mean,ci95,mean,ci95,mean,ci95
aug_name,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
No augmentation,96.05,0.19,96.22,0.31,95.62,0.51
Rotate,97.01,0.24,97.28,0.34,95.93,0.41
Horizontal flip,95.88,0.25,96.38,0.5,96.47,0.57
Color jitter,97.46,0.33,96.82,0.41,95.79,0.5
Packet loss,97.47,0.35,97.48,0.28,97.29,0.27
Time shift,97.44,0.42,97.78,0.38,97.94,0.19
Change rtt,98.24,0.31,98.29,0.39,98.43,0.12
