# Tables

Prepare results.csv

In [None]:
import pandas as pd
import numpy as np

import os
from datetime import datetime

project_dir = "/content"
run_dir = os.path.join(project_dir, "results")

results_csv = os.path.join(run_dir, "results.csv")
df = pd.read_csv(results_csv)
print("Loaded:", results_csv)
print("Rows:", len(df), "Cols:", len(df.columns))

num_cols = [
    "n_eval","n_ok","fail_rate","changed_rate","avg_similarity",
    "invariance_rate",
    "acc","f1","acc_clean","f1_clean","drop_acc","drop_f1",
    "time_sec","peak_rss_mb","attack_success_rate"
]
for c in num_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

dataset_col = "dataset_key" if "dataset_key" in df.columns else ("dataset" if "dataset" in df.columns else None)
if dataset_col is None:
    raise ValueError("Не нашла колонку dataset_key или dataset в results.csv")

dataset_names = {
    "sst2": "SST-2",
    "imdb": "IMDb",
    "emotion": "Emotion",
    "glue/sst2": "SST-2",
    "dair-ai/emotion": "Emotion",
    "imdb": "IMDb",
}

def pretty_dataset(x):
    return dataset_names.get(str(x), str(x))

print("Datasets:", sorted(df[dataset_col].dropna().unique().tolist()))
df.head(5)

Creating summary_by_test.csv

In [None]:
summary_by_test = (
    df.groupby([dataset_col, "model", "tool", "test_id"])
      .agg(
          repeats=("repeat_id","nunique"),
          n_eval_mean=("n_eval","mean"),
          n_ok_mean=("n_ok","mean"),
          fail_rate_mean=("fail_rate","mean"),
          changed_rate_mean=("changed_rate","mean"),
          sim_mean=("avg_similarity","mean"),
          inv_mean=("invariance_rate","mean"),
          drop_acc_mean=("drop_acc","mean"),
          drop_acc_std=("drop_acc","std"),
          drop_f1_mean=("drop_f1","mean"),
          drop_f1_std=("drop_f1","std"),
          time_mean=("time_sec","mean"),
          time_std=("time_sec","std"),
          peak_mb_mean=("peak_rss_mb","mean"),
          peak_mb_std=("peak_rss_mb","std"),
          asr_mean=("attack_success_rate","mean"),
          asr_std=("attack_success_rate","std"),
      )
      .reset_index()
      .sort_values([dataset_col, "model", "tool", "test_id"])
)

summary_path = os.path.join(run_dir, "summary_by_test.csv")
summary_by_test.to_csv(summary_path, index=False)
print("saved:", summary_path)

summary_by_test.head(20)

Create table_augly.csv, table_checklist.csv, table_textattack.csv

In [None]:
def make_table_for_tool(tool_name: str):
    d = df[df["tool"] == tool_name].copy()

    tab = (
        d.groupby([dataset_col, "model", "test_id"])
         .agg(
             repeats=("repeat_id","nunique"),
             n_eval_mean=("n_eval","mean"),
             n_ok_mean=("n_ok","mean"),
             fail_rate_mean=("fail_rate","mean"),
             changed_rate_mean=("changed_rate","mean"),
             sim_mean=("avg_similarity","mean"),
             inv_mean=("invariance_rate","mean"),
             drop_acc_mean=("drop_acc","mean"),
             drop_acc_std=("drop_acc","std"),
             time_mean=("time_sec","mean"),
             time_std=("time_sec","std"),
             peak_mb_mean=("peak_rss_mb","mean"),
             peak_mb_std=("peak_rss_mb","std"),
             asr_mean=("attack_success_rate","mean"),
             asr_std=("attack_success_rate","std"),
         )
         .reset_index()
         .sort_values([dataset_col, "model", "test_id"])
    )

    # для augly и checklist ASR нет
    if tool_name in ["augly", "checklist"]:
        tab = tab.drop(columns=["asr_mean","asr_std"], errors="ignore")

    return tab

tab_augly = make_table_for_tool("augly")
tab_check = make_table_for_tool("checklist")
tab_ta    = make_table_for_tool("textattack")

p1 = os.path.join(run_dir, "table_augly.csv")
p2 = os.path.join(run_dir, "table_checklist.csv")
p3 = os.path.join(run_dir, "table_textattack.csv")

tab_augly.to_csv(p1, index=False)
tab_check.to_csv(p2, index=False)
tab_ta.to_csv(p3, index=False)

print("saved:", p1)
print("saved:", p2)
print("saved:", p3)

tab_augly.head(10)


Create table_overall_tools_by_dataset.csv

In [None]:
d = df[df["tool"].isin(["augly","checklist","textattack"])].copy()

by_test = (
    d.groupby([dataset_col, "model", "tool", "test_id"])
     .agg(
         repeats=("repeat_id","nunique"),
         drop_acc_mean=("drop_acc","mean"),
         time_mean=("time_sec","mean"),
         fail_rate_mean=("fail_rate","mean"),
         changed_rate_mean=("changed_rate","mean"),
         sim_mean=("avg_similarity","mean"),
         inv_mean=("invariance_rate","mean"),
         peak_mb_mean=("peak_rss_mb","mean"),
         asr_mean=("attack_success_rate","mean"),
     )
     .reset_index()
)

overall = (
    by_test.groupby([dataset_col, "model", "tool"])
           .agg(
               n_tests=("test_id","nunique"),
               drop_acc_mean=("drop_acc_mean","mean"),
               time_mean_sec=("time_mean","mean"),
               fail_rate_mean=("fail_rate_mean","mean"),
               changed_rate_mean=("changed_rate_mean","mean"),
               sim_mean=("sim_mean","mean"),
               inv_mean=("inv_mean","mean"),
               peak_mb_mean=("peak_mb_mean","mean"),
               asr_mean=("asr_mean","mean"),
           )
           .reset_index()
           .sort_values([dataset_col, "model", "tool"])
)

overall["drop_acc_mean"] = overall["drop_acc_mean"].round(4)
overall["time_mean_sec"] = overall["time_mean_sec"].round(2)
overall["fail_rate_mean"] = overall["fail_rate_mean"].round(3)
overall["changed_rate_mean"] = overall["changed_rate_mean"].round(3)
overall["sim_mean"] = overall["sim_mean"].round(3)
overall["inv_mean"] = overall["inv_mean"].round(3)
overall["asr_mean"] = overall["asr_mean"].round(3)

out_path = os.path.join(run_dir, "table_overall_tools_by_dataset.csv")
overall.to_csv(out_path, index=False)
print("saved:", out_path)

overall.head(30)

Thesis table

In [None]:
df = pd.read_csv(summary_path)

num_cols = ["fail_rate_mean", "changed_rate_mean", "sim_mean", "drop_acc_mean", "drop_acc_std",
            "drop_f1_mean", "drop_f1_std", "time_mean", "time_std", "peak_mb_mean", "peak_mb_std",
            "asr_mean", "asr_std"]

for c in num_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

tools = ["augly", "checklist", "textattack"]
tool_names = {"augly": "AugLy", "checklist": "CheckList", "textattack": "TextAttack"}

def get_summary_for_tool(tool):
    tool_data = df[df["tool"] == tool].copy()

    summary = tool_data.groupby("model").agg(
        n_eval_mean=("n_eval_mean", "mean"),
        fail_rate_mean=("fail_rate_mean", "mean"),
        fail_rate_std=("fail_rate_mean", "std"),
        drop_acc_mean=("drop_acc_mean", "mean"),
        drop_acc_std=("drop_acc_std", "std"),
        drop_f1_mean=("drop_f1_mean", "mean"),
        drop_f1_std=("drop_f1_std", "std"),
        time_mean=("time_mean", "mean"),
        time_std=("time_std", "std"),
        peak_mb_mean=("peak_mb_mean", "mean"),
        peak_mb_std=("peak_mb_std", "std"),
        asr_mean=("asr_mean", "mean"),
        asr_std=("asr_std", "std"),
    ).reset_index()

    summary["tool"] = tool_names[tool]

    return summary

augly_summary = get_summary_for_tool("augly")
checklist_summary = get_summary_for_tool("checklist")
textattack_summary = get_summary_for_tool("textattack")

summary_all = pd.concat([augly_summary, checklist_summary, textattack_summary])

model_names = {"sk_tfidf_lr": "TF-IDF_LR", "hf_distilbert": "DistilBERT"}
summary_all["model"] = summary_all["model"].map(model_names).fillna(summary_all["model"])


summary_all_path = os.path.join(run_dir, "summary_for_tools.csv")
summary_all.to_csv(summary_all_path, index=False)

print("saved:", summary_all_path)
