## Weight Difference

In [None]:
import pandas as pd
import seaborn as sns
import torch
import matplotlib.pyplot as plt

torch.set_grad_enabled(False)


def _layername_to_type(s: str) -> str:
    if s.startswith("layers"):
        return ".".join(s.split(".")[2:])
    else:
        return ".".join(s.split(".")[0:])


def _layername_to_layer(s: str) -> int:
    if s.startswith("layers"):
        return int(s.split(".")[1])
    else:
        return -1


def plot_diffs(df: pd.DataFrame, title: str = "", y: str = "diff", **kwargs):
    if not (df.name1 == df.name2).all():
        raise ValueError("Layers do not match.")

    df = df.copy()
    df["layer_type"] = df["name1"].apply(_layername_to_type)
    df["layer_idx"] = df["name1"].apply(_layername_to_layer)

    g = sns.relplot(data=df, x="layer_idx", y=y, col="layer_type", col_wrap=5, height=2, aspect=1.6, **kwargs)
    g.figure.suptitle(title, y=1.02)


model_groups = [
    [
        "inclusionAI/AReaL-boba-2-8B-Open",
        "nvidia/AceReason-Nemotron-7B",
        "qihoo360/Light-R1-7B-DS",
        "Skywork/Skywork-OR1-7B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
    ],
    [
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
        "nvidia/AceReason-Nemotron-1.1-7B",
        "open-r1/OpenR1-Distill-7B",
        "sail/Qwen2.5-Math-7B-Oat-Zero",
        "Qwen/Qwen2.5-Math-7B",
        "open-r1/Qwen2.5-Math-7B-RoPE-300k",
        "Nickyang/ConciseR-Zero-7B",
    ],
    [
        "agentica-org/DeepCoder-1.5B-Preview",
        "zwhe99/DeepMath-1.5B",
        "agentica-org/DeepScaleR-1.5B-Preview",
        "Nickyang/FastCuRL-1.5B-V3",
        "oumi-ai/MiniMath-R1-1.5B",
        "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B",
        "GD-ML/Open-RS1",
        "knoveleng/Open-RS3",
        "RUC-AIBOX/STILL-3-1.5B-preview",
        "Zyphra/ZR1-1.5B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    ],
]
data = pd.read_csv("weight_diff.csv").drop(columns=["Unnamed: 0.1", "Unnamed: 0"])
data["pair"] = data["model1"] + "__" + data["model2"]
data["layer_type"] = data["name1"].apply(_layername_to_type)
data["layer_idx"] = data["name1"].apply(_layername_to_layer)

data.head()

In [None]:
data.pair.unique()

In [None]:
# group, basemodel, col_order = 0, "DeepSeek-R1-Distill-Qwen-7B", None
# group, basemodel, col_order = (
#     1,
#     "Qwen/Qwen2.5-Math-7B",
#     [
#         "sail/Qwen2.5-Math-7B-Oat-Zero__Qwen/Qwen2.5-Math-7B",
#         "nvidia/AceReason-Nemotron-1.1-7B__Qwen/Qwen2.5-Math-7B",
#         "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B__Qwen/Qwen2.5-Math-7B",
#         "open-r1/OpenR1-Distill-7B__Qwen/Qwen2.5-Math-7B",
#     ],
# )
# group, basemodel, col_order = 2, "DeepSeek-R1-Distill-Qwen-1.5B", None


# subdf = data.loc[
#     (data.group == group)
#     & data.pair.str.contains(basemodel)
#     & ~data.pair.str.contains(model_to_remove)
#     & (data.layer_idx >= 0)
# ].copy()

model_to_remove = "open-r1/Qwen2.5-Math-7B-RoPE-300k"
groups_and_basemodels = [
    (0, "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"),
    (1, "Qwen/Qwen2.5-Math-7B"),
    # (5, "asd"),
    (2, "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"),
]
subdfs = []
for group, basemodel in groups_and_basemodels:
    subdf = data.loc[
        (data.group == group)
        & data.pair.str.contains(basemodel)
        & ~data.pair.str.contains(model_to_remove)
        & (data.layer_idx >= 0)
    ].copy()
    subdfs.append(subdf)
subdf = pd.concat(subdfs)


def _agg_layer_types(s: str):
    if "self_attn" in s:
        if "bias" not in s:
            return "Self-Attention Weights"
        else:
            return "Self-Attention Biases"
    if "mlp" in s:
        return "MLP Weights"
    return "Norm Weights"


def _pair_to_descendant(data: dict, remove_hfowner: bool = True):
    parts_full = data["pair"].split("__")
    parts_short = [name.split("/")[-1] for name in data["pair"].split("__")]

    group = data["group"]
    basemodel = [x[1] for x in groups_and_basemodels][group]
    if parts_full[0] == basemodel:
        return parts_short[1] if remove_hfowner else parts_full[1]
    else:
        return parts_short[0] if remove_hfowner else parts_full[0]


subdf["layer_type_agg"] = subdf["layer_type"].apply(_agg_layer_types)
subdf["Descendant"] = subdf.apply(_pair_to_descendant, axis="columns")

subdf.loc[subdf["Descendant"] == "Nemotron-Research-Reasoning-Qwen-1.5B", "Descendant"] = (
    "Nemotron-Res.-Reas.-Qwen-1.5B"
)

# sns.relplot(
#     subdf,
#     x="layer_idx",
#     y="normalized_diff",
#     col="pair",
#     row="group",
#     hue="layer_type_agg",
#     kind="line",
#     # sharey=True,
#     col_order=col_order,
# )

# 1. Define a base color for each group
#    (e.g., blue for group 0, green for 1, orange for 2)
group_base_colors = {0: "blue", 1: "maroon", 2: "darkcyan"}

# 2. Create a palette dictionary to map each pair to a color
custom_palette = {}
for group_id, base_color in group_base_colors.items():
    # Find all unique pairs within the current group
    # pairs_in_group = subdf[subdf.group == group_id]["pair"].unique()
    pairs_in_group = subdf[subdf.group == group_id]["Descendant"].unique()

    # Generate a palette of shades for this group's base color
    # We generate a few more colors than needed to get a good spread
    shades = sns.light_palette(base_color, n_colors=len(pairs_in_group) + 2)

    # Assign a unique shade to each pair in the group
    # We skip the very lightest shade (at index 0) as it can be hard to see
    for i, pair in enumerate(pairs_in_group):
        custom_palette[pair] = shades[i + 1]

# subdf = subdf.loc[~subdf.Descendant.isin(["qihoo360/Light-R1-7B-DS", "oumi-ai/MiniMath-R1-1.5B"])]

sns.set_context("paper")

g = sns.relplot(
    subdf,
    x="layer_idx",
    y="normalized_diff",
    col="layer_type_agg",
    hue="Descendant",
    kind="line",
    palette=custom_palette,
    height=1.875,
    aspect=1.61,
    col_wrap=2,
    # legend=False
)
g.set_titles("{col_name}")
g.set_ylabels("Normalized\nWeight Difference")
# g.set(yscale='log')
g.savefig("outputs/figs/weightdiff.pdf")

### weitere mögliche plots

lets compare the lines per facet to their mean --> unified scale: change in percent of mean change --> identify localization better

man sieht nichts interessantes

In [None]:
meandiff = (
    subdf.groupby(["Descendant", "layer_type_agg"])["normalized_diff"]
    .mean()
    .reset_index()
    .rename(columns={"normalized_diff": "normalized_diff_mean"})
)

subdf_w_meandiffcol = subdf.merge(meandiff, how="left", on=["Descendant", "layer_type_agg"])
subdf_w_meandiffcol["scaled_normalized_diff"] = (
    subdf_w_meandiffcol["normalized_diff"] / subdf_w_meandiffcol["normalized_diff_mean"]
)
subdf_w_meandiffcol

for agg_layer_type in subdf_w_meandiffcol.layer_type_agg.unique():
    g = sns.relplot(
        subdf_w_meandiffcol[subdf_w_meandiffcol.layer_type_agg == agg_layer_type],
        x="layer_idx",
        y="scaled_normalized_diff",
        col="layer_type_agg",
        hue="Descendant",
        kind="line",
        palette=custom_palette,
        height=3,
        aspect=1.6,
        legend=False,
    )
    g.set_titles("{col_name}")


convert colors for latex use

In [None]:
# blue, maroon, darkcyan

import matplotlib

matplotlib.colors.to_rgb("darkcyan")

ob man die norm der weight diff nach model1 oder model2 nroamlisiert macht keinen unterschied

In [None]:
subdf = data.loc[data.group == 2]
(subdf["normalized_diff1"] - subdf["normalized_diff2"]).describe()

## CKA

In [None]:
import pandas as pd

df = pd.read_csv("combined_cka_results.csv", index_col="index").drop(columns="Unnamed: 0")
df.head(3)

In [None]:
descendant_mapping = {
    "Nemotron-Research-Reasoning-Qwen-1-5B": "Nemotron-Res.-Reas.-Qwen-1.5B",
    "ZR1-1-5B": "ZR1-1.5B",
    "DeepScaleR-1-5B-Preview": "DeepScaleR-1.5B-Preview",
    "DeepCoder-1-5B-Preview": "DeepCoder-1.5B-Preview",
    "Open-RS1": "Open-RS1",
    "DeepMath-1-5B": "DeepMath-1.5B",
    "FastCuRL-1-5B-V3": "FastCuRL-1.5B-V3",
    "MiniMath-R1-1-5B": "MiniMath-R1-1.5B",
    "Open-RS3": "Open-RS3",
    "STILL-3-1-5B-preview": "STILL-3-1.5B-preview",
    "AceReason-Nemotron-1.1-7B": "AceReason-Nemotron-1.1-7B",
    "OpenR1-Distill": "OpenR1-Distill-7B",
    "DeepSeek-R1-Distill-Qwen-7B": "DeepSeek-R1-Distill-Qwen-7B",
    "Qwen2.5-Math-7B-Oat-Zero": "Qwen2.5-Math-7B-Oat-Zero",
    "ConciseR-Zero-7B": "ConciseR-Zero-7B",
    "AceReason-Nemotron-7B": "AceReason-Nemotron-7B",
    "Light-R1-7B-DS": "Light-R1-7B-DS",
    "Skywork-OR1-7B": "Skywork-OR1-7B",
}
df["Descendant"] = df["descendant"].map(descendant_mapping)

In [None]:
df.token_origin.unique()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib.lines import Line2D

plotdf = df[df.token_origin == "DeepSeek-R1-Distill-Qwen-7B"]
# plotdf = plotdf[plotdf.layer < plotdf.layer.max()]

models = sorted(
    plotdf["base_model"].unique(),
    key=lambda x: {"Qwen2.5-Math-7B": 0, "DeepSeek-R1-Distill-Qwen-7B": 1, "DeepSeek-R1-Distill-Qwen-1-5B": 2}[x],
)

# 2 rows (linear, rbf), 3 columns (models)
fig, axes = plt.subplots(2, 3, figsize=(3 * 2 * 1.6, 2 * 2), sharex=False, sharey=False)

palette = custom_palette
custom_xticks = [1, 10, 12, 20, 27]

for col, model in enumerate(models):
    subdf = plotdf[plotdf["base_model"] == model]

    # Top row: cka_linear
    sns.lineplot(
        data=subdf,
        x="layer",
        y="cka_linear",
        hue="Descendant",
        # style="Descendant",
        markers=True,
        dashes=False,
        palette=palette,
        ax=axes[0, col],
        legend=False,
    )
    axes[0, col].set_title(model)
    axes[0, col].set_xlabel("")
    if col == 0:
        axes[0, col].set_ylabel("Linear CKA")
    else:
        axes[0, col].set_ylabel("")

    axes[0, col].set_xticks(custom_xticks)
    axes[0, col].set_xlim(1, 27)

    # First axis: match ylim
    if col == 2:
        axes[0, col].set_ylim(0.95, 1.002)

    # Bottom row: cka_rbf
    sns.lineplot(
        data=subdf,
        x="layer",
        y="cka_rbf",
        hue="Descendant",
        # style="Descendant",
        markers=True,
        dashes=False,
        palette=palette,
        ax=axes[1, col],
        legend=False,
    )
    axes[1, col].set_xlabel("Layer")
    axes[1, col].set_xticks(custom_xticks)
    axes[1, col].set_xlim(1, 27)

    if col == 0:
        axes[1, col].set_ylabel("RBF CKA")
    else:
        axes[1, col].set_ylabel("")


plt.tight_layout()

handles = [Line2D([0], [0], color=color, lw=2, label=name) for name, color in palette.items()]
plt.figlegend(handles=handles, bbox_to_anchor=(1.26, 0.95), loc="upper right")
# plt.savefig("outputs/figs/cka_linear_rbf_w_final_layer.pdf", bbox_inches="tight")
plt.savefig("outputs/figs/cka_linear_rbf.pdf", bbox_inches="tight")
plt.show()


#### For all token origins

In [None]:
for token_origin in df.token_origin.unique():
    plotdf = df[df.token_origin == token_origin]
    plotdf = plotdf[plotdf.layer <= plotdf.layer.max()]

    models = sorted(
        plotdf["base_model"].unique(),
        key=lambda x: {"Qwen2.5-Math-7B": 0, "DeepSeek-R1-Distill-Qwen-7B": 1, "DeepSeek-R1-Distill-Qwen-1-5B": 2}[x],
    )

    # 2 rows (linear, rbf), 3 columns (models)
    fig, axes = plt.subplots(2, 3, figsize=(3 * 2 * 1.6, 2 * 2), sharex=False, sharey=False)

    palette = custom_palette
    custom_xticks = [1, 10, 12, 20, 27]

    for col, model in enumerate(models):
        subdf = plotdf[plotdf["base_model"] == model]

        # Top row: cka_linear
        sns.lineplot(
            data=subdf,
            x="layer",
            y="cka_linear",
            hue="Descendant",
            # style="Descendant",
            markers=True,
            dashes=False,
            palette=palette,
            ax=axes[0, col],
            legend=False,
        )
        axes[0, col].set_title(model)
        axes[0, col].set_xlabel("")
        if col == 0:
            axes[0, col].set_ylabel("Linear CKA")
        else:
            axes[0, col].set_ylabel("")

        axes[0, col].set_xticks(custom_xticks)
        # axes[0, col].set_xlim(1, 27)

        # First axis: match ylim
        # if col == 2:
        # axes[0, col].set_ylim(0.95, 1.002)

        # Bottom row: cka_rbf
        sns.lineplot(
            data=subdf,
            x="layer",
            y="cka_rbf",
            hue="Descendant",
            # style="Descendant",
            markers=True,
            dashes=False,
            palette=palette,
            ax=axes[1, col],
            legend=False,
        )
        axes[1, col].set_xlabel("Layer")
        axes[1, col].set_xticks(custom_xticks)
        # axes[1, col].set_xlim(1, 27)

        if col == 0:
            axes[1, col].set_ylabel("RBF CKA")
        else:
            axes[1, col].set_ylabel("")

    plt.tight_layout()

    handles = [Line2D([0], [0], color=color, lw=2, label=name) for name, color in palette.items()]
    plt.figlegend(handles=handles, bbox_to_anchor=(1.26, 0.95), loc="upper right")
    # plt.suptitle(f"Reasoning Traces from {token_origin}", y=1.02)
    plt.savefig(f"outputs/figs/cka_linear_rbf_{token_origin}.pdf", bbox_inches="tight")
    plt.show()


### CKA Before/After top proj removal

In [None]:
!ls outputs/cka_in_depth/*.csv -l

In [None]:
from pathlib import Path

root = Path("outputs/cka_in_depth")
partialdfs = []
for p in root.iterdir():
    if p.name.startswith("cka_results"):
        partialdf = pd.read_csv(p)
        partialdfs.append(partialdf)

df = pd.concat(partialdfs, ignore_index=True)
df = df.melt(
    id_vars=["base_model", "descendant_model", "sample_id", "layer"],
    value_vars=["linear_cka_score", "linear_cka_wo_highpc"],
)
df.loc[:, "variable"] = df.loc[:, "variable"].map(
    {
        "linear_cka_score": "Linear CKA (all tokens)",
        "linear_cka_wo_highpc": "Linear CKA (w/o top 10 PC1\nprojecting tokens)",
    }
)
df = df.rename(columns={"variable": "Type", "descendant_model": "Descendant"})
df.loc[:, "Descendant"] = df["Descendant"].apply(lambda s: s.split("/")[-1])
print(len(df))
df.head(3)


In [None]:
g = sns.relplot(
    df,
    x="layer",
    hue="Descendant",
    y="value",
    style="Type",
    palette=custom_palette,
    aspect=1.6,
    height=2,
    kind="line",
    hue_order=[
        "ConciseR-Zero-7B",
        "Qwen2.5-Math-7B-Oat-Zero",
        "OpenR1-Distill-7B",
        "AceReason-Nemotron-1.1-7B",
        "DeepSeek-R1-Distill-Qwen-7B",
    ],
)
g.set_ylabels("Representational\nSimilarity")
g.set_xlabels("Layer")
g.savefig("outputs/figs/cka_wo_pc.pdf")

### Correlation token norm and pc1 proj magnitude

In [None]:
from pathlib import Path

root = Path("outputs/cka_in_depth")
partialdfs = []
for p in root.iterdir():
    if p.name.startswith("correlations"):
        partialdf = pd.read_csv(p)
        partialdfs.append(partialdf)

df = pd.concat(partialdfs, ignore_index=True)
print(len(df))
df.head(3)

In [None]:
df["overlap_descendant"]

In [None]:
sns.relplot(df.groupby(["layer"])["overlap_descendant"].describe(), x="layer", y="mean")

In [None]:
sns.relplot(df, x="layer", y="overlap_descendant", col="descendant_model", kind="line")

## Stitching

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("data/stitching_performance.csv")
df

In [None]:
baselines = df.loc[df.traindata == "x"]
plotdata = df.loc[df.traindata != "x"].copy()

n_layers = 28
plotdata["plot_layer"] = plotdata["layer"].apply(lambda x: x if x > 0 else n_layers + x + 1)

sns.set_context("paper")
g = sns.relplot(
    plotdata, x="plot_layer", y="accuracy", hue="targetmodel", kind="line", marker="o", height=3, aspect=1.61
)
for basemodel in baselines.basemodel.unique():
    acc = baselines.loc[baselines.basemodel == basemodel, "accuracy"].values
    g.axes[0, 0].axhline(acc, linestyle="--", label=basemodel)
    g.axes[0, 0].text(n_layers + 2, acc, basemodel)
plt.suptitle("MATH 500")
g.savefig("outputs/figs/stitching.pdf")