In [1]:
from lewidi_lib import load_dataset
import numpy as np

df = load_dataset("CSC", "train")
df["n_annotations"] = df["annotations"].apply(len)
df["invidiual_annotations"] = df["annotations"].apply(
    lambda d: [int(v) for v in d.values()]
)
df["avg_invidiual_annotations"] = df["invidiual_annotations"].apply(np.mean)
df["annotation_variance"] = df["invidiual_annotations"].apply(np.var)

In [None]:
df_maxvar = df.query("n_annotations == 6").query(
    "annotation_variance == annotation_variance.max()"
)
row = df_maxvar.iloc[1]
row.to_dict()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


def plot_sarcasm_annotations(
    annotations: list[int], is_distr: bool = False, ylim: float | None = None
):
    if not is_distr:
        ys = np.bincount(annotations)
    else:
        ys = annotations
    fig, ax = plt.subplots(figsize=(4, 3))
    ax = sns.barplot(ys, ax=ax)
    ax.set_xlim(0.5, None)
    if not is_distr:
        ax.set_yticks(range(0, max(ys) + 1))
    ax.set_xlabel("Sarcasm Rating")
    if not is_distr:
        ax.set_ylabel("Count")
    else:
        ax.set_ylabel("Probability")
    ax.grid(axis="y", alpha=0.5)
    ax.set_ylim(0, ylim)
    plt.close(fig)
    return fig


fig = plot_sarcasm_annotations(row["invidiual_annotations"])
ds_idx = row["dataset_idx"]
fig.savefig(
    f"imgs/special_examples/dsidx_{ds_idx}_human_annotations.pdf", bbox_inches="tight"
)
fig

In [4]:
df_high_sarcasm = df.query("n_annotations == 6")
df_high_sarcasm = df_high_sarcasm.sort_values(
    "avg_invidiual_annotations", ascending=False
)
high_scarsm_row = df_high_sarcasm.iloc[0].to_dict()

In [None]:
fig2 = plot_sarcasm_annotations(high_scarsm_row["invidiual_annotations"])
ds_idx = high_scarsm_row["dataset_idx"]
fig2.savefig(
    f"imgs/special_examples/dsidx_{ds_idx}_human_annotations.pdf", bbox_inches="tight"
)
fig2

In [None]:
fig3 = plot_sarcasm_annotations(
    [0.0, 0.1, 0.2, 0.3, 0.25, 0.1, 0.05],
    is_distr=True,
)
fig3
fig3.savefig(
    f"imgs/special_examples/dsidx_{row['dataset_idx']}_gemini25_sarcasm_annotation.pdf",
    bbox_inches="tight",
)
fig3

In [None]:
fig4 = plot_sarcasm_annotations(
    [0.0, 0.01, 0.02, 0.08, 0.3, 0.44, 0.15],
    is_distr=True,
)
fig4
fig4.savefig(
    f"imgs/special_examples/dsidx_{high_scarsm_row['dataset_idx']}_gemini25_sarcasm_annotation.pdf",
    bbox_inches="tight",
)
fig4

# How Good Is This?

In [None]:
from lewidi_lib import compute_majority_baseline


maj_baseline = compute_majority_baseline(df)
fig_mb = plot_sarcasm_annotations(maj_baseline["pred"].values[0], is_distr=True)
fig_mb.savefig("imgs/special_examples/csc-majority_baseline.pdf", bbox_inches="tight")
fig_mb

In [None]:
from lewidi_lib import baseline_pred


fig = plot_sarcasm_annotations(
    [0.0, *baseline_pred(n_classes=6)], is_distr=True, ylim=0.4
)
fig.savefig("imgs/special_examples/csc-uniform_baseline.pdf", bbox_inches="tight")
fig