# Basic diagram

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

project_dir = "/content"
run_dir = os.path.join(project_dir, "results")


results_csv = os.path.join(run_dir, "results.csv")
df = pd.read_csv(results_csv)
print("Loaded:", results_csv)
print("Rows:", len(df), "Cols:", len(df.columns))

num_cols = [
    "n_eval","n_ok","fail_rate","changed_rate","avg_similarity",
    "invariance_rate",
    "acc","f1","acc_clean","f1_clean","drop_acc","drop_f1",
    "time_sec","peak_rss_mb","attack_success_rate"
]
for c in num_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

dataset_col = "dataset_key" if "dataset_key" in df.columns else ("dataset" if "dataset" in df.columns else None)
if dataset_col is None:
    raise ValueError("Не нашла колонку dataset_key или dataset в results.csv")

dataset_names = {
    "sst2": "SST-2",
    "imdb": "IMDb",
    "emotion": "Emotion",
    "glue/sst2": "SST-2",
    "dair-ai/emotion": "Emotion",
    "imdb": "IMDb",
}

def pretty_dataset(x):
    return dataset_names.get(str(x), str(x))

print("Datasets:", sorted(df[dataset_col].dropna().unique().tolist()))
df.head(5)


model_names = {"sk_tfidf_lr": "TF-IDF_LR", "hf_distilbert": "DistilBERT"}
tool_names  = {"augly": "AugLy", "checklist": "CheckList", "textattack": "TextAttack"}
tool_rank   = {"augly": 0, "checklist": 1}

C1 = "#E6E6E6"
C2 = "#B3B3B3"
HATCH2 = "///"

def short_test(t):
    return str(t).replace("augly_", "").replace("checklist_", "")

def _apply_bar_hatches(ax, n_series=2):
    hatches = ["", HATCH2][:n_series]
    for i, container in enumerate(ax.containers):
        for bar in container:
            bar.set_hatch(hatches[i] if i < len(hatches) else "")

def _label_bars(ax, fmt="{:.3f}", fontsize=9):
    for container in ax.containers:
        labels = []
        for v in container.datavalues:
            if pd.isna(v):
                labels.append("")
            else:
                labels.append(fmt.format(float(v)))
        ax.bar_label(container, labels=labels, padding=2, fontsize=fontsize)

def plot_drop_acc_aug_check(summary_path, ds_key):
    s = pd.read_csv(summary_path)
    if dataset_col not in s.columns:
        # fallback: если в summary лежит dataset вместо dataset_key
        ds_col = "dataset" if "dataset" in s.columns else dataset_col
    else:
        ds_col = dataset_col

    s = s[(s[ds_col] == ds_key) & (s["tool"].isin(["augly","checklist"]))].copy()
    if len(s) == 0:
        print("No data for", ds_key)
        return

    s["Модель"] = s["model"].map(model_names).fillna(s["model"])
    s["Инструмент"] = s["tool"].map(tool_names).fillna(s["tool"])
    s["tool_rank"] = s["tool"].map(tool_rank).fillna(99)

    s["Тест"] = s["test_id"].apply(short_test) + " (" + s["Инструмент"] + ")"

    pivot = s.pivot(index="Тест", columns="Модель", values="drop_acc_mean")

    order_rows = (
        s[s["Модель"] == "DistilBERT"][["Тест", "tool_rank", "drop_acc_mean"]]
        .sort_values(["tool_rank", "drop_acc_mean"], ascending=[True, False])["Тест"]
        .tolist()
    )
    pivot = pivot.reindex(order_rows)

    ax = pivot.plot(kind="bar", color=[C1, C2], edgecolor="black", figsize=(10,4))
    _apply_bar_hatches(ax, n_series=len(pivot.columns))
    _label_bars(ax, fmt="{:.3f}", fontsize=9)

    ax.set_title(f"Падение точности по тестам AugLy и CheckList — {pretty_dataset(ds_key)}")
    ax.set_xlabel("Тест")
    ax.set_ylabel("drop_acc_mean (5 запусков)")
    ax.grid(True, axis="y", alpha=0.3)
    ax.legend(title="Модель")

    ymax = float(np.nanmax(pivot.values))
    ax.set_ylim(0, max(0.001, ymax * 1.25))

    plt.xticks(rotation=20, ha="right")
    plt.tight_layout()
    plt.show()

def plot_similarity_aug_check(summary_path, ds_key):
    s = pd.read_csv(summary_path)
    ds_col = dataset_col if dataset_col in s.columns else ("dataset" if "dataset" in s.columns else dataset_col)

    s = s[(s[ds_col] == ds_key) & (s["tool"].isin(["augly","checklist"]))].copy()
    if len(s) == 0:
        print("No data for", ds_key)
        return

    s["Инструмент"] = s["tool"].map(tool_names).fillna(s["tool"])
    s["tool_rank"] = s["tool"].map(tool_rank).fillna(99)
    s["Тест"] = s["test_id"].apply(short_test) + " (" + s["Инструмент"] + ")"

    agg = s.groupby(["Тест", "tool_rank"], as_index=False)["sim_mean"].mean()
    agg = agg.sort_values(["tool_rank", "sim_mean"], ascending=[True, True])

    ax = agg.set_index("Тест")["sim_mean"].plot(
        kind="bar",
        color=C1,
        edgecolor="black",
        figsize=(10,4)
    )

    # hatch для checklist
    for bar, rank in zip(ax.patches, agg["tool_rank"].tolist()):
        if rank == 1:
            bar.set_hatch(HATCH2)

    labels = [f"{v:.3f}" if pd.notna(v) else "" for v in agg["sim_mean"].tolist()]
    ax.bar_label(ax.containers[0], labels=labels, padding=2, fontsize=9)

    ax.set_title(f"Схожесть текста после преобразований — {pretty_dataset(ds_key)}")
    ax.set_xlabel("Тест")
    ax.set_ylabel("sim_mean")
    ax.grid(True, axis="y", alpha=0.3)

    ymax = float(np.nanmax(agg["sim_mean"].values))
    ax.set_ylim(0, min(1.1, ymax * 1.60))

    plt.xticks(rotation=20, ha="right")
    plt.tight_layout()
    plt.show()

def plot_textattack_cov_asr(overall_path, ds_key):
    o = pd.read_csv(overall_path)
    ds_col = dataset_col if dataset_col in o.columns else ("dataset" if "dataset" in o.columns else dataset_col)

    o = o[(o[ds_col] == ds_key) & (o["tool"] == "textattack")].copy()
    if len(o) == 0:
        print("No TextAttack data for", ds_key)
        return

    o["Модель"] = o["model"].map(model_names).fillna(o["model"])
    o["Покрытие"] = (1.0 - pd.to_numeric(o["fail_rate_mean"], errors="coerce")) * 100.0
    o["ASR"] = pd.to_numeric(o["asr_mean"], errors="coerce") * 100.0

    order_models = ["TF-IDF + LR", "DistilBERT"]
    o["Модель"] = pd.Categorical(o["Модель"], categories=order_models, ordered=True)
    o = o.sort_values("Модель")

    cov = o["Покрытие"].tolist()
    asr = o["ASR"].tolist()
    models = o["Модель"].tolist()

    if len(models) != 2:
        print("Ожидала 2 модели, но нашла:", models)
        return

    x = np.arange(2)
    width = 0.28

    fig, ax = plt.subplots(figsize=(7,4))
    bars_m1 = ax.bar(x - width/2, [cov[0], asr[0]], width, label=models[0], color=C1, edgecolor="black")
    bars_m2 = ax.bar(x + width/2, [cov[1], asr[1]], width, label=models[1], color=C2, edgecolor="black")

    for b in bars_m2:
        b.set_hatch(HATCH2)

    ax.bar_label(bars_m1, labels=[f"{v:.1f}%" for v in [cov[0], asr[0]]], padding=2, fontsize=9)
    ax.bar_label(bars_m2, labels=[f"{v:.1f}%" for v in [cov[1], asr[1]]], padding=2, fontsize=9)

    ax.set_title(f"Покрытие и успешность атаки TextAttack — {pretty_dataset(ds_key)}")
    ax.set_xlabel("Показатель")
    ax.set_ylabel("Процент, %")
    ax.set_xticks(x)
    ax.set_xticklabels(["Покрытие", "ASR"])
    ax.set_ylim(0, 115)
    ax.grid(True, axis="y", alpha=0.3)
    ax.legend(title="Модель")
    plt.tight_layout()
    plt.show()

def plot_augly_time(table_augly_path, ds_key):
    t = pd.read_csv(table_augly_path)
    ds_col = dataset_col if dataset_col in t.columns else ("dataset" if "dataset" in t.columns else dataset_col)

    t = t[t[ds_col] == ds_key].copy()
    if len(t) == 0:
        print("No AugLy table data for", ds_key)
        return

    pivot = t.pivot(index="test_id", columns="model", values="time_mean").sort_index()
    pivot = pivot.rename(columns=model_names)

    ax = pivot.plot(kind="bar", color=[C1, C2], edgecolor="black", figsize=(10,4))
    _apply_bar_hatches(ax, n_series=len(pivot.columns))

    # адаптивные подписи времени
    for container in ax.containers:
        labels = []
        for v in container.datavalues:
            if pd.isna(v) or v <= 0:
                labels.append("")
            elif v < 1:
                labels.append(f"{v:.3f}")
            elif v < 10:
                labels.append(f"{v:.2f}")
            elif v < 100:
                labels.append(f"{v:.1f}")
            else:
                labels.append(f"{v:.0f}")
        ax.bar_label(container, labels=labels, padding=2, fontsize=9)

    ax.set_title(f"Среднее время выполнения тестов AugLy — {pretty_dataset(ds_key)}")
    ax.set_xlabel("Тест AugLy")
    ax.set_ylabel("Время, сек (среднее, log)")
    ax.set_yscale("log")
    ax.grid(True, axis="y", alpha=0.3)

    ymax = float(np.nanmax(pivot.values))
    ymin = float(np.nanmin(pivot.values)) if np.isfinite(np.nanmin(pivot.values)) else 1e-3
    ax.set_ylim(bottom=max(1e-3, ymin/2), top=max(1e-2, ymax * 5))

    ax.legend(title="Модель")
    plt.xticks(rotation=20, ha="right")
    plt.tight_layout()
    plt.show()

def plot_tool_time(overall_path, ds_key):
    o = pd.read_csv(overall_path)
    ds_col = dataset_col if dataset_col in o.columns else ("dataset" if "dataset" in o.columns else dataset_col)

    o = o[o[ds_col] == ds_key].copy()
    if len(o) == 0:
        print("No overall data for", ds_key)
        return

    o["Инструмент"] = o["tool"].map(tool_names).fillna(o["tool"])
    o["Модель"] = o["model"].map(model_names).fillna(o["model"])

    order_tools = ["CheckList", "AugLy", "TextAttack"]
    o["Инструмент"] = pd.Categorical(o["Инструмент"], categories=order_tools, ordered=True)
    o = o.sort_values(["Инструмент", "Модель"])

    pivot = o.pivot(index="Инструмент", columns="Модель", values="time_mean_sec")

    ax = pivot.plot(kind="bar", color=[C1, C2], edgecolor="black", figsize=(7,4))
    _apply_bar_hatches(ax, n_series=len(pivot.columns))

    for container in ax.containers:
        labels = []
        for v in container.datavalues:
            if pd.isna(v) or v <= 0:
                labels.append("")
            elif v < 1:
                labels.append(f"{v:.3f}")
            elif v < 10:
                labels.append(f"{v:.2f}")
            else:
                labels.append(f"{v:.0f}")
        ax.bar_label(container, labels=labels, padding=2, fontsize=9)

    ax.set_title(f"Среднее время выполнения по инструментам — {pretty_dataset(ds_key)}")
    ax.set_xlabel("Инструмент")
    ax.set_ylabel("Время, сек (среднее, log)")
    ax.set_yscale("log")
    ax.grid(True, axis="y", alpha=0.3)

    # запас сверху, чтобы подписи не упирались в рамку
    ymax = float(np.nanmax(pivot.values))
    ax.set_ylim(top=max(1e-2, ymax * 2.2))  # множитель можно 1.8–3.0 подобрать

    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.show()

def plot_sec_per_example(overall_path, summary_path, ds_key):
    o = pd.read_csv(overall_path)
    s = pd.read_csv(summary_path)

    ds_col_o = dataset_col if dataset_col in o.columns else ("dataset" if "dataset" in o.columns else dataset_col)
    ds_col_s = dataset_col if dataset_col in s.columns else ("dataset" if "dataset" in s.columns else dataset_col)

    o = o[o[ds_col_o] == ds_key].copy()
    s = s[s[ds_col_s] == ds_key].copy()
    if len(o) == 0 or len(s) == 0:
        print("No data for", ds_key)
        return

    # среднее n_ok по инструменту (по тестам)
    nok = (
        s.groupby(["model", "tool"], as_index=False)["n_ok_mean"]
        .mean()
        .rename(columns={"n_ok_mean": "n_ok_mean_tool"})
    )

    m = o.merge(nok, on=["model", "tool"], how="left")
    m["sec_per_example"] = m["time_mean_sec"] / m["n_ok_mean_tool"]

    m["Инструмент"] = m["tool"].map(tool_names).fillna(m["tool"])
    m["Модель"] = m["model"].map(model_names).fillna(m["model"])

    order_tools = ["CheckList", "AugLy", "TextAttack"]
    m["Инструмент"] = pd.Categorical(m["Инструмент"], categories=order_tools, ordered=True)
    m = m.sort_values(["Инструмент", "Модель"])

    pivot = m.pivot(index="Инструмент", columns="Модель", values="sec_per_example")

    ax = pivot.plot(kind="bar", color=[C1, C2], edgecolor="black", figsize=(7,4))
    _apply_bar_hatches(ax, n_series=len(pivot.columns))

    for container in ax.containers:
        labels = []
        for v in container.datavalues:
            if pd.isna(v) or v <= 0:
                labels.append("")
            elif v < 0.01:
                labels.append(f"{v:.4f}")
            elif v < 1:
                labels.append(f"{v:.3f}")
            else:
                labels.append(f"{v:.2f}")
        ax.bar_label(container, labels=labels, padding=2, fontsize=9)

    ax.set_title(f"Время на 1 пример по инструментам (сек/пример) — {pretty_dataset(ds_key)}")
    ax.set_xlabel("Инструмент")
    ax.set_ylabel("Секунд на 1 пример (log)")
    ax.set_yscale("log")
    ax.grid(True, axis="y", alpha=0.3)
    # запас сверху, чтобы подписи не упирались в рамку
    ymax = float(np.nanmax(pivot.values))
    ax.set_ylim(top=max(1e-6, ymax * 2.2))
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.show()

In [None]:
import os
project_dir = "/content"
run_dir = os.path.join(project_dir, "results")
summary_path = os.path.join(run_dir, "summary_by_test.csv")
overall_path = os.path.join(run_dir, "table_overall_tools_by_dataset.csv")
table_augly_path = os.path.join(run_dir, "table_augly.csv")

datasets = sorted(df[dataset_col].dropna().unique().tolist())

for ds_key in datasets:
    plot_drop_acc_aug_check(summary_path, ds_key)
    plot_similarity_aug_check(summary_path, ds_key)
    plot_textattack_cov_asr(overall_path, ds_key)
    plot_augly_time(table_augly_path, ds_key)
    plot_tool_time(overall_path, ds_key)
    plot_sec_per_example(overall_path, summary_path, ds_key)