In [None]:
# !pip install repeng accelerate datasets matplotlib seaborn

In [None]:
#!/usr/bin/env python
# RASPID prep – control vector + fluff-chunk classifier (seeded splits)
import os, math, random, warnings, re
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from repeng import ControlModel, ControlVector, DatasetEntry
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
import warnings
from tqdm import tqdm

# ─── Reproducibility ───────────────────────────────────────────────────────
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# ─── Config ───────────────────────────────────────────────────────────────
MODEL_NAME   = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
DEVICE       = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE        = torch.float32
EMB_LAYER    = 20
CTRL_VEC_PT  = "ctrl_vector.pt"
MAX_CV_EX    = 800          # contrastive examples for control vector
MAX_CLF_EX   = 200           # examples for chunk classifier
CHUNK_SIZES  = [16, 32, 64]
BATCH_SIZE   = 32
MAX_CHUNKS   = 5000          # cap per chunk-size

print(f"Using first {MAX_CV_EX} examples for control vector creation")
print(f"Using first {MAX_CLF_EX} examples for chunk classifier dataset")

# ─── Load base model once ────────────────────────────────────────────────
tokenizer  = AutoTokenizer.from_pretrained(MODEL_NAME)
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=DTYPE,
    device_map="auto" if DEVICE == "cuda" else None
).eval()
control_model = ControlModel(base_model, [EMB_LAYER]).to(DEVICE)

# ─── Control vector (PCA-center) ─────────────────────────────────────────
def train_or_load_ctrlvec():
    if os.path.exists(CTRL_VEC_PT):
        print("✅ loaded ctrl-vec")
        return torch.load(CTRL_VEC_PT, map_location=DEVICE, weights_only=False)
    ds = load_dataset("rb/aime_reasoning", "default")["train"].select(range(MAX_CV_EX))
    print(f"Control-vector training on indices 0..{MAX_CV_EX-1}")
    pairs = [DatasetEntry(r["refined_reasoning"], r["reasoning_content"]) for r in ds]
    cv = ControlVector.train(control_model, tokenizer, pairs, batch_size=1,
                             method="pca_center",
                             transform_hiddens=lambda h: {
                                 l: v / (np.linalg.norm(v, 1) + 1e-12)
                                 for l, v in h.items()
                             })
    torch.save(cv, CTRL_VEC_PT)
    print("💾 saved ctrl-vec")
    return cv

ctrl_vec = train_or_load_ctrlvec()

# ─── Chunk-embedding helper ──────────────────────────────────────────────
def embed_texts(texts, max_len):
    feats = []
    for i in range(0, len(texts), BATCH_SIZE):
        toks = tokenizer(texts[i:i+BATCH_SIZE], padding=True, truncation=True,
                         max_length=max_len, return_tensors="pt").to(DEVICE)
        with torch.inference_mode():
            h = base_model(**toks, output_hidden_states=True).hidden_states[EMB_LAYER]
        feats.append(h.mean(1).cpu().numpy())
    return np.vstack(feats)

# ─── Build chunk dataset once ────────────────────────────────────────────
ds_small = load_dataset("rb/aime_reasoning", "default")["train"].select(range(MAX_CLF_EX))
clean_ids = [tokenizer.encode(r["refined_reasoning"]) for r in ds_small]
fluff_ids = [tokenizer.encode(r["reasoning_content"]) for r in ds_small]

def make_chunks(cs):
    chunks, labels = [], []
    # clean=0
    for seq in clean_ids:
        for i in range(0, len(seq)-cs+1, cs):
            chunks.append(tokenizer.decode(seq[i:i+cs], skip_special_tokens=True))
            labels.append(0)
    # fluff=1
    for seq in fluff_ids:
        for i in range(0, len(seq)-cs+1, cs):
            chunks.append(tokenizer.decode(seq[i:i+cs], skip_special_tokens=True))
            labels.append(1)
    # cap
    if len(chunks) > MAX_CHUNKS:
        idx = np.random.choice(len(chunks), MAX_CHUNKS, replace=False)
        chunks = [chunks[i] for i in idx]
        labels = [labels[i] for i in idx]
    return chunks, np.array(labels)

# ─── Grid-search chunk size ──────────────────────────────────────────────
best_cs, best_acc, clf_best = None, 0.0, None

for cs in CHUNK_SIZES:
    X_text, y = make_chunks(cs)
    X = embed_texts(X_text, cs)
    Xtr, Xval, ytr, yval = train_test_split(X, y, test_size=0.2,
                                            random_state=SEED, stratify=y)
    clf = SGDClassifier(loss="log_loss", max_iter=500, tol=1e-3, random_state=SEED)
    clf.fit(Xtr, ytr)
    acc = accuracy_score(yval, clf.predict(Xval))
    print(f"chunk={cs:3d} → train {len(ytr)}, val {len(yval)}, val acc {acc*100:5.1f}%")
    if acc > best_acc:
        best_cs, best_acc, clf_best = cs, acc, clf

print(f"\n▶ Best chunk = {best_cs} (val {best_acc*100:.1f}%)")

# ─── Final evaluation plots ──────────────────────────────────────────────
X_text, y = make_chunks(best_cs)
X = embed_texts(X_text, best_cs)
Xtr, Xval, ytr, yval = train_test_split(X, y, test_size=0.2,
                                        random_state=SEED, stratify=y)
clf_best.fit(Xtr, ytr)
pred   = clf_best.predict(Xval)
scores = clf_best.decision_function(Xval)

cm = confusion_matrix(yval, pred)
plt.figure(figsize=(4,3))
plt.imshow(cm, cmap="Blues")
plt.colorbar()
plt.title("Confusion matrix")
plt.xticks([0,1],["clean","fluff"])
plt.yticks([0,1],["clean","fluff"])
for i in range(2):
    for j in range(2):
        plt.text(j, i, cm[i,j], ha="center", va="center", color="white")
plt.tight_layout()
plt.show()

fpr, tpr, _ = roc_curve(yval, scores)
plt.figure(figsize=(4,3))
plt.plot(fpr, tpr, label=f"AUC={auc(fpr,tpr):.3f}")
plt.plot([0,1], [0,1], "--")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.title("ROC curve")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
MAX_TOKENS = 4096

In [None]:
# ─── Baselinea generator ───────────────────────────────────────────────────
@torch.inference_mode()
def generate_baseline(prompt, max_tokens=MAX_TOKENS):
    inp = tokenizer(prompt, return_tensors="pt").to(DEVICE)
    out = base_model.generate(
        **inp,
        max_new_tokens=max_tokens,
        do_sample=True, temperature=0.3,
        top_p=0.9,
        repetition_penalty=1.2,
        pad_token_id=tokenizer.eos_token_id
    )
    toks = out.shape[1] - inp.input_ids.shape[1]
    return tokenizer.decode(out[0], skip_special_tokens=True), toks

In [None]:
import re, torch, math, numpy as np

@torch.inference_mode()
def generate_raspid(
    prompt: str,
    max_tokens: int = 2048,
    chunk_size: int = 64,            # best chunk size
    kp: float = 0.3,
    ki: float = 0.002,
    kd: float = 0.0005,
    max_alpha: float = 1.0,
    clf=clf_best,
    clf_layer: int = EMB_LAYER,
    fluff_star: float = 0.05,        # target fluff‐prob
    raw_scale: float = 10.0,         # scale raw score into sigmoid
    base_temp: float = 1.0,
    steer_temp: float = 0.3,
    init_free: int = 40,
    steer_window: int = 60,
    max_repeat: int = 8,
    stop_regex: str = r"\\boxed\{[^{}]{1,12}\}",
):
    stop_re = re.compile(stop_regex)
    ids = tokenizer(prompt, return_tensors="pt").to(DEVICE).input_ids[0]
    out_ids, past = ids.clone(), None

    α = I = D = prev_err = 0.0
    chunk_hidd = None
    tok_in_chunk = 0
    steering_on = False
    steer_start = 0
    repeat_ctr = 0
    last_tok = None

    print("\n*** RASPID LOG START ***")
    print(" chunk | p_fluff |  err   |   α    |  temp")

    while len(out_ids)-len(ids) < max_tokens:
        gen_len = len(out_ids)-len(ids)
        if not steering_on and gen_len >= init_free:
            steering_on, steer_start = True, gen_len
        if steering_on and gen_len - steer_start > steer_window:
            steering_on, α, I, D = False, 0.0, 0.0, 0.0

        control_model.set_control(ctrl_vec, coeff=α if steering_on else 0.0)

        out = control_model(
            input_ids=out_ids[-1:].unsqueeze(0),
            past_key_values=past,
            use_cache=True,
            output_hidden_states=True,
        )
        past = out.past_key_values
        logits = out.logits[0, -1].clamp(-100,100)
        h_last = out.hidden_states[clf_layer][0, -1]

        # repetition guard
        tok = out_ids[-1].item()
        if tok == last_tok:
            repeat_ctr += 1
            if repeat_ctr >= max_repeat:
                break
        else:
            repeat_ctr, last_tok = 0, tok

        # accumulate hidden
        chunk_hidd = h_last if chunk_hidd is None else chunk_hidd + h_last
        tok_in_chunk += 1

        if tok_in_chunk >= chunk_size:
            feat = (chunk_hidd/chunk_size).cpu().unsqueeze(0).numpy()
            feat /= (np.linalg.norm(feat, axis=1, keepdims=True)+1e-12)

            raw = clf.decision_function(feat)[0]
            p_fluff = 1/(1+math.exp(-raw/raw_scale))
            err     = p_fluff - fluff_star

            # PID update
            I += ki * err
            D = (1-0.1)*D + 0.1*kd*(err-prev_err)
            prev_err = err
            α = max(0.0, min(max_alpha, kp*err + I + D))

            temp = base_temp*(1 - α/max_alpha) + steer_temp*(α/max_alpha)
            print(f"{chunk_size:6d} | {p_fluff:7.3f} | {err:+6.3f} | "
                  f"{α:7.3f} | {temp:6.3f}")

            # reset
            chunk_hidd = None
            tok_in_chunk = 0

        # sample next token
        temp = base_temp*(1 - α/max_alpha) + steer_temp*(α/max_alpha)
        probs = torch.softmax(logits/temp, dim=-1)
        nxt = torch.multinomial(probs, 1).item()
        out_ids = torch.cat([out_ids, torch.tensor([nxt], device=DEVICE)])

        if stop_re.search(tokenizer.decode([nxt])) or nxt==tokenizer.eos_token_id:
            break

    print("*** RASPID LOG END ***\n")
    return tokenizer.decode(out_ids, skip_special_tokens=True), len(out_ids)-len(ids)

In [None]:
# ─── AIME benchmark ────────────────────────────────────────────────────────
def run_aime_benchmark(n: int = 1, max_tokens: int = 4096):
    ds_all = load_dataset("rb/aime_reasoning", "default")["train"]
    # reserve indices used for control vector and chunk classifier
    reserved = set(range(MAX_CV_EX)) | set(range(MAX_CLF_EX))
    eval_indices = [i for i in range(len(ds_all)) if i not in reserved]
    avail = len(eval_indices)
    if n > avail:
        warnings.warn(f"Requested {n} problems but only {avail} available for evaluation; reducing to {avail}.")
        n = avail

    eval_ds = ds_all.select(eval_indices)
    problems = random.sample(list(eval_ds), n)

    records = []
    for prob in tqdm(problems, desc="AIME benchmark"):
        try:
            # q   = prob["question"]
            # ref = norm(str(prob["reference_answer"]))
            q = "what is the square root of 256?"
            ref = "16"

            prompt = (
                f"{q}\n\n"
                "Answer step by step and end with: "
                "Final answer: \\boxed{numeric_value}"
            )

            b_txt, b_tok = generate_baseline(prompt, max_tokens)
            r_txt, r_tok = generate_raspid  (prompt, max_tokens)

            b_ans = norm(extract_answer(b_txt))
            r_ans = norm(extract_answer(r_txt))

            records.append({
                "question": q,
                "reference": ref,
                "baseline_txt": b_txt,
                "raspid_txt": r_txt,
                "baseline_tokens": b_tok,
                "raspid_tokens": r_tok,
                "baseline_answer": b_ans,
                "raspid_answer": r_ans,
                "baseline_correct": (b_ans == ref),
                "raspid_correct": (r_ans == ref),
                "token_saving_pct": (1 - r_tok / b_tok) * 100 if b_tok else 0
            })

        except Exception as e:
            warnings.warn(f"⚠️  Error on AIME problem:\n{q}\n→ {e}\nSkipping.")
            continue

    if not records:
        print("No successful generations — nothing to report.")
        return pd.DataFrame()

    df = pd.DataFrame(records)
    print(
        f"\nAcc baseline: {df.baseline_correct.mean()*100:.1f}% | "
        f"RASPID: {df.raspid_correct.mean()*100:.1f}% | "
        f"Avg saving: {df.token_saving_pct.mean():.1f}%"
    )
    df.to_csv("aime_results.csv", index=False)
    return df

if __name__ == "__main__":
    run_aime_benchmark(n=1, max_tokens=4096)

In [None]:
df = pd.read_csv('aime_results.csv')

In [None]:
print(df.iloc[0]['raspid_txt'])

In [None]:
print(df.iloc[0]['baseline_txt'])

In [None]:
ds[0]

In [None]:
#!/usr/bin/env python
import os
import numpy as np
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from repeng import ControlModel, ControlVector, DatasetEntry

# ─── CONFIG ─────────────────────────────────────────────────────────────────
MODEL_NAME   = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
EMB_LAYERS   = [20]
CTRL_OUT     = "ctrl_vector.pt"
MAX_CV_EX    = 1000
BATCH_SIZE   = 1

# ─── NORMALIZATION HELPER ────────────────────────────────────────────────────
def normalize_hiddens(hiddens):
    # L₂ normalize each hidden‐state vector
    normed = {}
    for layer, H in hiddens.items():
        lengths = np.linalg.norm(H, axis=1, keepdims=True) + 1e-12
        normed[layer] = H / lengths
    return normed

# ─── LOAD MODEL ─────────────────────────────────────────────────────────────
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
base = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",         # automatically shards weights to GPU/CPU
    torch_dtype=torch.float32
).eval()

# Wrap without moving to(device)—we rely on base’s own device_map
ctrl_model = ControlModel(base, EMB_LAYERS)

# ─── TRAIN OR LOAD CONTROL VECTOR ───────────────────────────────────────────
if os.path.exists(CTRL_OUT):
    print("✅ loading existing control vector")
    ctrl_vec = torch.load(CTRL_OUT, map_location="cpu", weights_only=False)
else:
    print(f"⚙️  training new control vector on first {MAX_CV_EX} examples…")
    ds = load_dataset("rb/aime_reasoning", "default")["train"].select(range(MAX_CV_EX))
    pairs = [DatasetEntry(r["refined_reasoning"], r["reasoning_content"]) for r in ds]
    ctrl_vec = ControlVector.train(
        model       = ctrl_model,
        tokenizer   = tokenizer,
        contrastive = pairs,
        batch_size  = BATCH_SIZE,
        method      = "pca_center",
        transform_hiddens = normalize_hiddens
    )
    torch.save(ctrl_vec, CTRL_OUT)
    print("💾 saved new control vector to", CTRL_OUT)

# ─── INSPECT ────────────────────────────────────────────────────────────────
print("\nControlVector:")
print(" model_type:", ctrl_vec.model_type)
print(" layers:", sorted(ctrl_vec.directions.keys()))
for L,v in ctrl_vec.directions.items():
    print(f"  layer {L}: ‖v‖₂ = {np.linalg.norm(v):.4f}")
