## Install Dependencies

In [None]:
"""

TODO: Make requirements.txt

"""
!pip install -qU bitsandbytes peft transformers accelerate
!pip install -q datasets sentencepiece

## Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Imports and Config

In [None]:
# --------------------------
# 1. Environment Setup
# --------------------------
import torch
import os
from google.colab import userdata
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
    GenerationConfig,
    pipeline
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from peft import PeftModel

# T4-specific settings
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Critical folders for T4 training
essential_folders = {
    "offload": "/content/drive/MyDrive/math_model/offload",
    "outputs": "/content/drive/MyDrive/math_model/outputs",
    "lora_adapter": "/content/drive/MyDrive/math_model/lora_adapter",
    "checkpoints": "/content/drive/MyDrive/math_model/checkpoints",  # NEW
    "cache": "/content/drive/MyDrive/math_model/hf_cache"  # NEW
}

# Create with verification
for name, path in essential_folders.items():
    os.makedirs(path, exist_ok=True)
    if not os.path.exists(path):
        raise RuntimeError(f"Failed to create {name} folder at {path}")
    print(f"✅ {name} folder: {path}")

# Set Hugging Face cache location
os.environ["HF_HOME"] = essential_folders["cache"]  # NEW

✅ offload folder: /content/drive/MyDrive/math_model/offload
✅ outputs folder: /content/drive/MyDrive/math_model/outputs
✅ lora_adapters folder: /content/drive/MyDrive/math_model/lora_adapter
✅ checkpoints folder: /content/drive/MyDrive/math_model/checkpoints
✅ cache folder: /content/drive/MyDrive/math_model/hf_cache


## Load Model

In [None]:
# --------------------------
# 2. Model & Tokenizer Loading
# --------------------------
model_name = "zhentingqi/rStar"
token = userdata.get('HF_TOKEN')

# 4-bit config for T4
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16  # float16 for T4 compatibility
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
tokenizer.pad_token = tokenizer.eos_token

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto",
    token=token,
    offload_folder=offload_folder
)

tokenizer_config.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/594 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.6k [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

## LoRA Setup

In [None]:
# --------------------------
# 3. LoRA Configuration
# --------------------------
# Prepare model
model = prepare_model_for_kbit_training(model)

# T4-optimized LoRA config
lora_config = LoraConfig(
    r=8,  # Lower rank for T4 memory
    lora_alpha=32,
    target_modules=["self_attn.q_proj", "self_attn.v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    modules_to_save=None
)

print("\n🔍 Trainable Parameters Breakdown:")
trainable_params = 0
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"- {name}: {param.numel()} params")
        trainable_params += param.numel()
print(f"Total trainable: {trainable_params/1e6:.2f}M")

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Should show ~4M trainable params


🔍 Trainable Parameters Breakdown:
Total trainable: 0.00M
trainable params: 3,932,160 || all params: 7,333,728,256 || trainable%: 0.0536


## Training Arguments

In [None]:
# --------------------------
# 4. Training Setup
# --------------------------
training_args = TrainingArguments(
    output_dir=essential_folders['checkpoints'],
    logging_dir=essential_folders['outputs'],
    save_steps=200,
    per_device_train_batch_size=1,  # Critical for T4
    gradient_accumulation_steps=8,   # Compensate for small batch
    learning_rate=1e-5,              # Lower rate for stability
    num_train_epochs=2,              # Fewer epochs
    fp16=True,
    gradient_checkpointing=True,     # Save memory
    logging_steps=5,
    optim="paged_adamw_8bit",        # Better for T4
    save_strategy="epoch",
    report_to="none"
)

## Dataset Presentation

In [None]:
# --------------------------
# 5. Data Preparation
# --------------------------
# TODO - Expand training dataset
train_dataset = [
    # From 2024 AIME I
    {
        "input": "Jen enters a lottery by picking 4 distinct numbers from S={1,2,...,10}. 4 numbers are randomly chosen from S. She wins a prize if at least two match, and wins the grand prize if all four match. Given she won a prize, find the probability of winning the grand prize.",
        "output": "Using conditional probability: P(Grand|Prize) = 1/C(10,4) / [C(4,2)C(6,2)+C(4,3)C(6,1)+1]/C(10,4) = 1/115 → 1+115=116. Answer: \boxed{116}"
    },

    {
        "input": "Alice and Bob play a token game starting with n tokens. Players take turns removing 1 or 4 tokens. Last token remover wins. Find numbers ≤2024 where Bob can force a win.",
        "output": "Bob wins when n ≡ 0 or 2 mod 5. Pattern repeats every 5 numbers. 2024 ÷ 5 = 404.8 → 405×2=810 winning numbers. Answer: \boxed{810}"
    },

    {
        "input": "Find the largest possible real part of (a+bi) where |a+bi|=1 and (a+bi)^3 is real.",
        "output": "Express in polar form: z³ = e^{i3θ} ∈ ℝ ⇒ 3θ = kπ ⇒ θ=0,π/3. Max Re(z)=cos(π/3)=0.5. Answer: \boxed{\dfrac{1}{2}}"
    },

    # From 2024 AIME II
    {
        "input": "In a 2x3 grid, place digits to make horizontal sums 1000 and vertical sums 200. Find number of arrangements.",
        "output": "Let columns be ABC DEF. A+B=1000, D+E=1000; C+F=200. Digit constraints ⇒ C=F=100 impossible. No solution exists. Answer: \boxed{0}"
    },

    {
        "input": "Find the side length of a convex equilateral hexagon with opposite sides parallel, where extended sides form a triangle with lengths 7, 8, 9.",
        "output": "Using similar triangles and parallel properties: Hexagon side = (7×8×9)/(7²+8²+9²) = 504/194 ≈ 2.598. Answer: \boxed{504}"
    },

    {
        "input": "Let x,y,z >0 satisfy log₂(x/yz)=1/2, log₂(y/xz)=1/3, log₂(z/xy)=1/4. Find |log₂(x⁴y³z²)|.",
        "output": "Convert to linear system: a-b-c=1/2, -a+b-c=1/3, -a-b+c=1/4. Solve: a=-7/24, b=-3/8, c=-5/12. |4a+3b+2c|=25/8 → 25+8=33. Answer: \boxed{33}"
    },

    {
        "input": "Eight radius-1 circles tangent to two lines. The inradius of formed triangle is m/n. Find m+n.",
        "output": "Using tangent circle geometry: Inradius r = 1+2√2. Answer: 1+2+2=5 → \boxed{5} (Hypothetical solution)"
    },

    {
        "input": "Find number of 13th roots of unity ζ where ζ^2024 + ζ^-2024 is real.",
        "output": "ζ^k = e^{2πik/13}. Real condition ⇒ cos(2024×2πk/13) ∈ ℝ. 2024 ≡ 2024 mod 13 = 9. Solutions when 9k ≡ 0 or 6.5 mod 13 ⇒ 6 solutions. Answer: \boxed{6}"
    },

    {
        "input": "In regular octagon, color vertices red/blue randomly. Find probability some rotation matches blue positions to original reds.",
        "output": "Use Burnside's lemma: 8 rotations. Only identity and 180° contribute. (2^8 + 2^4)/8×256 = 288/2048 = 9/64 → 9+64=73. Answer: \boxed{73}"
    },

    {
        "input": "Torus with radius 3 revolved around axis 6 units away. Sphere radius 11 tangent internally/externally. Find difference in tangent circle radii.",
        "output": "Using torus equations: r_in = 11-√(6²-3²)=11-√27, r_out=11+√27. Difference=2√27=6√3=6×3=18 → \boxed{18}"
    }
]

def tokenize_function(examples):
    text = f"Problem: {examples['input']}\nSolution: {examples['output']}"
    return tokenizer(
        text,
        truncation=True,
        max_length=1024,  # Shorter for T4 memory
        padding="max_length"
    )

tokenized_dataset = [tokenize_function(ex) for ex in train_dataset]

## Train Model

In [None]:
# --------------------------
# 6. Trainer Setup
# --------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=lambda data: {
        "input_ids": torch.stack([torch.tensor(d["input_ids"]) for d in data]),
        "attention_mask": torch.stack([torch.tensor(d["attention_mask"]) for d in data]),
        "labels": torch.stack([torch.tensor(d["input_ids"]) for d in data])
    }
)

print("Training started... (This may take 1-2 hours)")
trainer.train()

Training started... (This may take 1-2 hours)


Step,Training Loss


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


TrainOutput(global_step=2, training_loss=34.132816314697266, metrics={'train_runtime': 102.2597, 'train_samples_per_second': 0.196, 'train_steps_per_second': 0.02, 'total_flos': 424814460272640.0, 'train_loss': 34.132816314697266, 'epoch': 1.0})

## Save Model

In [None]:
# --------------------------
# 7. Save model
# --------------------------
model.save_pretrained(essential_folders['lora_adapter'])
tokenizer.save_pretrained(essential_folders['outputs']

NameError: name 'model' is not defined

## Inference

In [None]:
# --------------------------
# 9. Inference
# --------------------------
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=quant_config,
    device_map="auto"
)
trained_model = PeftModel.from_pretrained(base_model, essential_folders['lora_adapter'])

# T4-safe generation config
generation_config = GenerationConfig(
    max_new_tokens=512,  # Conservative for T4
    temperature=0.0,     # Fully deterministic
    do_sample=False,
    pad_token_id=tokenizer.eos_token_id
)

# Create pipeline
math_pipeline = pipeline(
    "text-generation",
    model=trained_model,
    tokenizer=tokenizer,
    generation_config=generation_config,
    device=0  # Force to GPU
)

# Test
prompt = "Problem: Prove that √3 is irrational.\nSolution:"
result = math_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
print(result)




"""
TODO: Find optimal parameters for model


Generation Config:
--------------------------------------------------------------------------------
max_new_tokens: 2048
                Set to 2048 to ensure the mode has enough space to generate a
                complete and detailed asnwer for complex proofs.May be a trade
                off because of the model rambling- will have to test and see.
temperature: 0.1
                Set to 0.1 for a small amount of randomness/ exploration. Due to
                the difficulty of these questions, I think that determanistic
                generation will not be PERFECT– we need the model to think.
do_sample: FALSE
                Set to false to discourage determanistic thinking
pad_token_id: tokenizer.eos_token_id
                Neccessary for batching
top_p: 0.9
                Set to 0.9 to focus on high probability tokens
repetition_penalty: 1.1
                Set to 1.1 to reduce repetition in long proofs
--------------------------------------------------------------------------------



"""