In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from lewidi_lib import load_dataset, enable_logging
import pandas as pd

enable_logging()

dataset = "CSC"
split = "train"
ddf = load_dataset(dataset, split=split)
print(len(ddf))
ddf.head(2)

In [None]:
import duckdb
from lewidi_lib import process_rdf


con = duckdb.connect()
rdf = con.sql("SELECT * FROM read_parquet('../parquets/*.parquet')").df()
model_ids = ["Qwen/Qwen3-14B", "Qwen/Qwen3-32B"]
gen_kwargs = "thinking"
rdf.query(
    "dataset == @dataset and split == @split and model_id in @model_ids and gen_kwargs == @gen_kwargs",
    inplace=True,
)
rdf = process_rdf(rdf, discard_invalid_pred=True)
rdf.head(2)

In [None]:
from lewidi_lib import assign_col_ws_loss
from lewidi_lib import entropy

joint_df = pd.merge(
    ddf[["dataset", "split", "request_idx", "target"]],
    rdf,
    on=["dataset", "split", "request_idx"],
)
joint_df = assign_col_ws_loss(joint_df)
joint_df["target_entropy"] = entropy(joint_df["target"])
joint_df["pred_entropy"] = entropy(joint_df["pred"])

assert joint_df["success"].all()
joint_df.head(2)

In [None]:
joint_df.columns

In [None]:
cols = ["request_idx", "model_id"]
perf_df = joint_df[cols + ["run_idx", "ws_loss"]]
perf_df = perf_df.groupby(cols, as_index=False).agg(avg_ws_loss=("ws_loss", "mean"))
perf_df

In [None]:
perf_df_wide = perf_df.pivot(
    index="request_idx", columns="model_id", values="avg_ws_loss"
)
perf_df_wide["diff"] = perf_df_wide["Qwen/Qwen3-32B"] - perf_df_wide["Qwen/Qwen3-14B"]
perf_df_wide.sort_values(by="diff", ascending=False)

# Entropy
Perhaps the larger model is degrading because its **too** cautious with its distribution. I.e. it tends to output flat distributions (high entropy) more often.

This does not seem to be the case. In fact the larger model outputs more concentrated distributions (low entropy)

In [None]:
joint_df.groupby("model_id").agg({"pred_entropy": ["mean", "count"]})

Perhaps the larger models is taking more risks (low entropy), and losing more often compared to a more cautious smaller model (high entropy).

I attempted different visualization, but none was very informative.

In [None]:
ent_df = joint_df[["model_id", "pred_entropy", "target_entropy", "ws_loss"]]
ent_df