In [None]:
from steerit import *

if __name__ == "__main__":
    # 1) Load a DeepSeek-R1-Distill-Qwen-1.5B
    model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    base_model = AutoModelForCausalLM.from_pretrained(model_name).to("cpu")

    # 2) Wrap with SteeringModel, specifying which layers to intercept (let's pick last 2 layers)
    chosen_layers = [10, 11]  # for sshleifer/tiny-gpt2, there are 12 layers
    model = SteeringModel(base_model, chosen_layers).to("cpu")

    # 3) Define a few contrastive prompt pairs
    prompt_pairs = [
        ("I am extremely happy about life", "I am extremely sad about life"),
        ("You are a joyful person", "You are a depressed person"),
    ]

    # 4) Train the SteeringVector
    # We set show_progress=True to see tqdm progress bars
    steering_vec = train_steering_vector(
        model, tokenizer, prompt_pairs, layer_ids=chosen_layers,
        method="pca_diff", batch_size=1, show_progress=True
    )

    # 5) Compare unsteered vs. steered generation
    prompt = "Tell me how you feel today:"
    print("\n--- Baseline Generation ---")
    model.reset_steering()
    baseline_text = model.generate_text(prompt, tokenizer, max_new_tokens=20, show_progress=True)
    print(baseline_text)

    print("\n--- Steered Generation (towards happy) ---")
    model.set_steering(steering_vec, coeff=10.0)  # big coefficient to see stronger effect
    steered_text = model.generate_text(prompt, tokenizer, max_new_tokens=20, show_progress=True)
    print(steered_text)

    print("\n--- Steered Generation (towards sad) ---")
    # negative direction
    model.set_steering(-steering_vec, coeff=10.0)
    steered_text_sad = model.generate_text(prompt, tokenizer, max_new_tokens=20, show_progress=True)
    print(steered_text_sad)
