# Riddle LLama
This notebook trains a reasoning model to answer riddles. Riddles are reasoning heavy driven tasks. A model needs to be able to learn to associate different facts/concepts together to coherently come up with the right answer.

In [1]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
riddles_dataset = load_dataset("mlfoundations-dev/riddle_sense")

Downloading readme: 100%|██████████| 543/543 [00:00<00:00, 262kB/s]
Downloading data: 100%|██████████| 3.33M/3.33M [00:23<00:00, 143kB/s]
Downloading data files: 100%|██████████| 1/1 [00:23<00:00, 23.34s/it]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 14.62it/s]
Generating train split: 100%|██████████| 26502/26502 [00:00<00:00, 128858.18 examples/s]


In [15]:
riddles_dataset["train"][7890]["question"]

{'stem': 'To avoid problems when going somewhere, whats a crucial step you should take?',
 'choices': [{'label': 'A', 'text': 'plan ahead'},
  {'label': 'B', 'text': 'find out where'},
  {'label': 'C', 'text': 'highway'},
  {'label': 'D', 'text': 'know where to go'},
  {'label': 'E', 'text': 'get going'}],
 'question_concept': 'go somewhere'}

In [17]:
# Load and prep dataset
SYSTEM_PROMPT = """
Respond in the following format:
<reasoning>
...
</reasoning>
<answer>
...
</answer>
"""

XML_COT_FORMAT = """\
<reasoning>
{reasoning}
</reasoning>
<answer>
{answer}
</answer>
"""


In [18]:
reformatted_riddles_dataset = []
labels = {}
for doc in riddles_dataset["train"]:
    for choice in doc["question"]["choices"]:
        labels[choice["label"]] = choice["text"]
    
    answer = labels[doc["answerKey"]]
    prompt= [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": doc["question"]["stem"]},
    ]
    reformatted_riddles_dataset.append({"prompt": prompt, "answer": answer})

In [19]:
from sklearn.model_selection import train_test_split
train, not_train = train_test_split(reformatted_riddles_dataset, test_size=0.2, random_state=42)
dev, test = train_test_split(not_train, test_size=0.5, random_state=42)

In [20]:
## Convert list of dicts to a huggingface dataset
from datasets import Dataset
train_dataset = Dataset.from_list(train)
dev_dataset = Dataset.from_list(dev)
test_dataset = Dataset.from_list(test)

In [22]:
train_dataset[0]

{'prompt': [{'content': '\nRespond in the following format:\n<reasoning>\n...\n</reasoning>\n<answer>\n...\n</answer>\n',
   'role': 'system'},
  {'content': 'HOw do you carry potatos home?', 'role': 'user'}],
 'answer': 'grocery bag'}

In [16]:
print(f"Train size: {len(train)}")
print(f"Dev size: {len(dev)}")
print(f"Test size: {len(test)}")

Train size: 21201
Dev size: 2650
Test size: 2651


In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth vllm
else:
    # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]]
    !pip install --no-deps unsloth vllm

In [None]:
#@title Colab Extra Install { display-mode: "form" }
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth vllm
else:
    !pip install --no-deps unsloth vllm
    # [NOTE] Do the below ONLY in Colab! Use [[pip install unsloth vllm]]
    # Skip restarting message in Colab
    import sys, re, requests; modules = list(sys.modules.keys())
    for x in modules: sys.modules.pop(x) if "PIL" in x or "google" in x else None
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft "trl==0.15.2" triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    
    # vLLM requirements - vLLM breaks Colab due to reinstalling numpy
    f = requests.get("https://raw.githubusercontent.com/vllm-project/vllm/refs/heads/main/requirements/common.txt").content
    with open("vllm_requirements.txt", "wb") as file:
        file.write(re.sub(rb"(transformers|numpy|xformers)[^\n]{1,}\n", b"", f))
    !pip install -r vllm_requirements.txt

### Unsloth
Load up `Qwen 2.5 3B Instruct`, and set parameters

In [None]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
max_seq_length = 1024 # Can increase for longer reasoning traces
lora_rank = 64 # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Qwen/Qwen2.5-3B-Instruct",
    max_seq_length = max_seq_length,
    load_in_4bit = True, # False for LoRA 16bit
    fast_inference = True, # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.5, # Reduce if out of memory
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ], # Remove QKVO if out of memory
    lora_alpha = lora_rank,
    use_gradient_checkpointing = "unsloth", # Enable long context finetuning
    random_state = 3407,
)