In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "Qwen/Qwen1.5-1.8B"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

model.eval()
print("Qwen 1.5-1.8B ready")


Loading tokenizer...
Loading model...
Qwen 1.5-1.8B ready


In [2]:
import torch
torch.cuda.is_available()


True

In [5]:
!which python

/taiga/illinois/eng/cs/juliahmr/jinulee/ojas/env/bin/python


In [3]:
from datasets import load_dataset

# Load the subject split
dataset = load_dataset(
    "tasksource/mmlu",
    name="high_school_government_and_politics",
    split="test"
)

# Print one example
print(dataset[0])


{'question': 'Which of the following best describes the balance the Supreme Court has struck between the establishment clause and the free-exercise clause?', 'choices': ['Freedom of speech is protected except in certain situations, such as yelling "fire" in a crowded theater.', 'Once a church has been recognized by the federal government, its tax-exempt status can never be revoked.', 'Once Congress has created an administrative agency, that agency can be dissolved only by a constitutional amendment.', 'State-sponsored prayer during school hours is prohibited, but voluntary prayer by student groups before school is allowed.'], 'answer': 3}


In [6]:
def format_prompt(example):
    question = example["question"]
    choices = example["choices"]
    labels = ["A", "B", "C", "D"]
    lettered_choices = [f"{label}. {text}" for label, text in zip(labels, choices)]
    joined = "\n".join(lettered_choices)
    prompt = f"{question}\n{joined}\nAnswer:"
    return prompt



In [7]:
print(format_prompt(dataset[0]))


Which of the following best describes the balance the Supreme Court has struck between the establishment clause and the free-exercise clause?
A. Freedom of speech is protected except in certain situations, such as yelling "fire" in a crowded theater.
B. Once a church has been recognized by the federal government, its tax-exempt status can never be revoked.
C. Once Congress has created an administrative agency, that agency can be dissolved only by a constitutional amendment.
D. State-sponsored prayer during school hours is prohibited, but voluntary prayer by student groups before school is allowed.
Answer:


In [8]:
def generate_completion(prompt, max_new_tokens=20, temperature=1.0, seed=None):
    import torch
    if seed is not None:
        torch.manual_seed(seed)

    input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        do_sample=True,  # we want different paths
        top_k=50,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)


In [9]:
prompt = format_prompt(dataset[0])
output1 = generate_completion(prompt, seed=42)
output2 = generate_completion(prompt, seed=123)

print("Completion 1:\n", output1)
print("\nCompletion 2:\n", output2)


Completion 1:
 Which of the following best describes the balance the Supreme Court has struck between the establishment clause and the free-exercise clause?
A. Freedom of speech is protected except in certain situations, such as yelling "fire" in a crowded theater.
B. Once a church has been recognized by the federal government, its tax-exempt status can never be revoked.
C. Once Congress has created an administrative agency, that agency can be dissolved only by a constitutional amendment.
D. State-sponsored prayer during school hours is prohibited, but voluntary prayer by student groups before school is allowed.
Answer: A

Completion 2:
 Which of the following best describes the balance the Supreme Court has struck between the establishment clause and the free-exercise clause?
A. Freedom of speech is protected except in certain situations, such as yelling "fire" in a crowded theater.
B. Once a church has been recognized by the federal government, its tax-exempt status can never be revo

In [11]:

def find_forking_index(output1, output2):
    tokens1 = tokenizer(output1, return_tensors="pt")["input_ids"][0]
    tokens2 = tokenizer(output2, return_tensors="pt")["input_ids"][0]

    min_len = min(len(tokens1), len(tokens2))
    for i in range(min_len):
        if tokens1[i] != tokens2[i]:
            return i
    return -1  # No fork found
results = []

for i in range(10):  # Start small with 10 examples
    ex = dataset[i]
    prompt = format_prompt(ex)

    out1 = generate_completion(prompt, seed=42)
    out2 = generate_completion(prompt, seed=123)

    fork_idx = find_forking_index(out1, out2)

    results.append({
        "question_id": i,
        "prompt": prompt,
        "completion_1": out1,
        "completion_2": out2,
        "forking_index": fork_idx,
        "correct_answer": ex["choices"][ex["answer"]]
    })

    print(f"[{i}] Fork at token:", fork_idx)


[0] Fork at token: 115
[1] Fork at token: 115
[2] Fork at token: 56
[3] Fork at token: -1
[4] Fork at token: 66
[5] Fork at token: 67
[6] Fork at token: 50
[7] Fork at token: 72
[8] Fork at token: 70
[9] Fork at token: -1


In [12]:
import json
import pandas as pd

# Save to JSON 
with open("forking_results.json", "w") as f:
    json.dump(results, f, indent=2)

# Save to CSV
csv_friendly = [
    {
        "question_id": r["question_id"],
        "prompt": r["prompt"],
        "completion_1": r["completion_1"].replace("\n", " "),
        "completion_2": r["completion_2"].replace("\n", " "),
        "forking_index": r["forking_index"],
        "correct_answer": r["correct_answer"]
    }
    for r in results
]

df = pd.DataFrame(csv_friendly)
df.to_csv("forking_results.csv", index=False)
