## Installation

In [61]:
# %%capture
import os, re

import torch; v = re.match(r"[0-9]{1,}\.[0-9]{1,}", str(torch.__version__)).group(0)
# xformers = "xformers==" + ("0.0.33.post1" if v=="2.9" else "0.0.32.post2" if v=="2.8" else "0.0.29.post3")
# !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
# !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
# !pip install --no-deps unsloth
# !pip install transformers==4.56.2
# !pip install --no-deps trl==0.22.2
# !pip install protobuf==3.20.3

# !pip install  scikit-learn

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [62]:
# !pip install scikit-learn
# !pip install matplotlib
# !pip install ipywidgets

## Model Loading

In [None]:
from unsloth import FastQwen2Model
from unsloth import is_bfloat16_supported
from trl import SFTTrainer
from transformers import TrainingArguments
import json
import pandas as pd

max_seq_length = 512
dtype = None
load_in_4bit = True

model, tokenizer = FastQwen2Model.from_pretrained(
    model_name="unsloth/Qwen2.5-Coder-0.5B-Instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

model = FastQwen2Model.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    use_gradient_checkpointing=True,
    random_state=3407,
)

==((====))==  Unsloth 2026.1.4: Fast Qwen2 patching. Transformers: 4.56.2. vLLM: 0.14.1.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 24.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [64]:
## Dataset Loading
from pathlib import Path

jsonl_file = Path('/home/larcanio/AIMO3_v2/data/datasets/splits/algebra_specialist/train.jsonl')

datapoints = []
with open(jsonl_file, 'r', encoding='utf-8') as f:
    for line_num, line in enumerate(f, 1):
        if line.strip():
            try:
                datapoint = json.loads(line)
                datapoints.append(datapoint)
            except json.JSONDecodeError as e:
                print(f"Error parsing line {line_num}: {e}")

print(f"Loaded {len(datapoints)} datapoints from dataset")

# Extract successful samples with code
columns = ['problem_id', 'problem', 'answer', 'domain', 'difficulty', 'pass@k', 'code']
items = []
for datapoint in datapoints:
    outcome = datapoint.get('outcome', {})
    if outcome.get('status') == 'success' and outcome.get('pass_at_k') is not None:
        classification = datapoint.get('classification', {})
        items.append([
            datapoint.get('problem_id'),
            datapoint.get('problem', {}).get('text'),
            datapoint.get('problem', {}).get('expected_answer'),
            classification.get('domain', 'unknown'),
            classification.get('difficulty', 'unknown'),
            outcome.get('pass_at_k'),
            datapoint.get('attempts', [])[outcome.get('pass_at_k') - 1].get('code') if datapoint.get('attempts') else None
        ])

dataset = pd.DataFrame(items, columns=columns)

print("\nDataset Statistics:")
print(f"  Total samples: {len(dataset)}")
print(f"\nPass@k Distribution:\n{dataset['pass@k'].value_counts()}")
print(f"\nDifficulty Distribution:\n{dataset['difficulty'].value_counts()}")
print(f"\nDomain Distribution:\n{dataset['domain'].value_counts()}")

Loaded 3049 datapoints from dataset

Dataset Statistics:
  Total samples: 3049

Pass@k Distribution:
pass@k
1    2791
2     229
3      24
4       5
Name: count, dtype: int64

Difficulty Distribution:
difficulty
unknown    3049
Name: count, dtype: int64

Domain Distribution:
domain
unknown    3049
Name: count, dtype: int64


In [65]:
import re as _re
## Dataset Preparation and Formatting

STRIP_COMMENTS = True  # Toggle: True to remove all Python comments from code, False to keep them

def strip_code_comments(code: str) -> str:
    """Remove Python comments from code.
    
    Handles:
      - Full-line comments (including # Goal: / # Plan: headers)
      - Inline comments (e.g.  x = 1  # some note)
      - Preserves strings containing '#'
      - Cleans up resulting blank lines
    """
    lines = code.split("\n")
    cleaned = []
    for line in lines:
        stripped = line.lstrip()
        # Skip full-line comments
        if stripped.startswith("#"):
            continue
        # Remove inline comments (naive but safe for math code without '#' in strings)
        # Walk the line respecting string literals
        result = []
        in_string = None
        i = 0
        while i < len(line):
            ch = line[i]
            if in_string:
                result.append(ch)
                if ch == '\\':
                    i += 1
                    if i < len(line):
                        result.append(line[i])
                elif ch == in_string:
                    in_string = None
            else:
                if ch in ('"', "'"):
                    # Check for triple quotes
                    if line[i:i+3] in ('"""', "'''"):
                        in_string = line[i:i+3]
                        result.append(line[i:i+3])
                        i += 3
                        # For triple-quoted strings we just keep the rest as-is
                        # (multi-line strings won't appear in single-line processing)
                        in_string = ch
                        continue
                    else:
                        in_string = ch
                        result.append(ch)
                elif ch == '#':
                    # Rest of line is a comment — trim trailing whitespace
                    break
                else:
                    result.append(ch)
            i += 1
        new_line = "".join(result).rstrip()
        # Skip lines that became empty after stripping
        if new_line.strip() == "":
            # Keep truly blank lines that were originally blank (preserve code structure)
            if line.strip() == "":
                cleaned.append("")
            continue
        cleaned.append(new_line)
    # Remove leading/trailing blank lines, collapse multiple blank lines
    text = "\n".join(cleaned)
    text = _re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()


def format_prompt(problem: str) -> str:
    return f"""Write a Python program that computes the correct answer to the following math problem.
Requirements:
- The program must compute the answer programmatically (do NOT hard-code the final value).
- The program must be fully self-contained and executable.
- The program must print ONLY the final numerical answer (no extra text).
Output format:
- Output exactly one Python code block, starting with ```python and ending with ```.
- Do not include any text outside the code block.
Problem:
{problem}"""

def format_dataset(df):
    """Format dataset samples using chat template."""
    def format_example(row):
        code = row['code']
        if STRIP_COMMENTS:
            code = strip_code_comments(code)
        messages = [
            {"role": "system", "content": "You are a mathematician writing Python code to solve problems."},
            {"role": "user", "content": format_prompt(row['problem'])},
            {"role": "assistant", "content": f"```python\n{code}\n```"}
        ]
        return tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )
    return df.apply(format_example, axis=1).tolist()

# Format datasets
print("Formatting datasets...")
print(f"  STRIP_COMMENTS = {STRIP_COMMENTS}")
train_texts = format_dataset(dataset)
print(f"  Training samples: {len(train_texts)}")

# Filter by token length
def filter_by_token_length(texts, tokenizer, max_length):
    filtered_texts = []
    token_lengths = []
    for text in texts:
        tokens = tokenizer.encode(text, add_special_tokens=True)
        token_length = len(tokens)
        token_lengths.append(token_length)
        if token_length <= max_length:
            filtered_texts.append(text)
    
    filtered_out = len(texts) - len(filtered_texts)
    return filtered_texts, filtered_out, token_lengths

print(f"\nFiltering samples by max token length ({max_seq_length})...")
train_texts_filtered, train_filtered_out, train_token_lengths = filter_by_token_length(
    train_texts, tokenizer, max_seq_length
)
print(f"  Original: {len(train_texts)}, Filtered out: {train_filtered_out}, Remaining: {len(train_texts_filtered)}")
if train_token_lengths:
    print(f"  Avg tokens: {sum(train_token_lengths)/len(train_token_lengths):.0f}, "
          f"Max: {max(train_token_lengths)}, Min: {min(train_token_lengths)}")

# Use filtered training texts directly
combined_train_texts = train_texts_filtered

print("\nFinal Training Dataset:")
print(f"  Total samples: {len(combined_train_texts)}")
print(f"  Filtered out total: {train_filtered_out}")

Formatting datasets...
  STRIP_COMMENTS = True
  Training samples: 3049

Filtering samples by max token length (512)...
  Original: 3049, Filtered out: 21, Remaining: 3028
  Avg tokens: 249, Max: 1016, Min: 161

Final Training Dataset:
  Total samples: 3028
  Filtered out total: 21


In [66]:
# Preview 3 samples — extract just the code block from the formatted text
import random
random.seed(42)
preview_indices = random.sample(range(len(combined_train_texts)), 3)
for idx in preview_indices:
    sample = combined_train_texts[idx]
    # Extract the code between ```python and ```
    start = sample.find("```python\n")
    end = sample.find("\n```", start + 10)
    code_block = sample[start + len("```python\n"):end] if start != -1 and end != -1 else "(could not extract)"
    print(f"{'='*60}")
    print(f"Sample #{idx}")
    print(f"{'='*60}")
    print(code_block)
    print()

Sample #2619
import fractions
from fractions import Fraction

def P_at(x):
    total = Fraction(0,1)
    for i in range(1,11):
        yi = Fraction(2**i,1)
        num = Fraction(1,1)
        den = Fraction(1,1)
        for j in range(1,11):
            if j==i: continue
            num *= Fraction(x-j,1)
            den *= Fraction(i-j,1)
        total += yi * num / den
    return total

value = P_at(12)
assert value.denominator == 1
print(value.numerator)

Sample #456
from fractions import Fraction

LIMIT = 5

min_val_sq = None
min_triple = None

for a in range(-LIMIT, LIMIT+1):
    for b in range(-LIMIT, LIMIT+1):
        for c in range(-LIMIT, LIMIT+1):
            if a == b or b == c or a == c:
                continue
            x = 2*a - b - c
            y = b - c
            if x == 0 or y == 0 or x == y:
                continue
            val_sq = x*x + 3*y*y
            if min_val_sq is None or val_sq < min_val_sq:
                min_val_sq = val_sq
                min_

## Training

In [None]:
## Training Setup and Configuration
from unsloth import train_on_responses_only

from datasets import Dataset
from datetime import datetime

# Configuration
model_name = "Qwen2.5-Coder-0.5B-Instruct"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_base = Path("output")
run_name = f"run_{model_name}_{timestamp}"
output_dir = output_base / run_name
checkpoint_dir = output_dir / "checkpoints"
model_output_dir = output_dir / "model"
config_file = output_dir / "training_config.md"

# Create directories
output_dir.mkdir(parents=True, exist_ok=True)
checkpoint_dir.mkdir(parents=True, exist_ok=True)
model_output_dir.mkdir(parents=True, exist_ok=True)

# Create training dataset
train_dataset = Dataset.from_dict({"text": combined_train_texts})

# Initialize trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    max_seq_length=max_seq_length,
    dataset_text_field="text",
    packing=True,
    args=TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=8,
        warmup_ratio=0.1,
        num_train_epochs=1,
        learning_rate=5e-5,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=50,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="cosine",
        seed=3407,
        output_dir=str(checkpoint_dir),
        save_strategy="epoch",
        save_total_limit=1,
        remove_unused_columns=True
    ),
)

trainer = train_on_responses_only(
    trainer,
    instruction_part="<|im_start|>user\n",
    response_part="<|im_start|>assistant\n",
)

print("Training configuration:\n")
print(f"  Model: {model_name}")
print(f"  Run: {run_name}")
print(f"  Output directory: {output_dir}")
print(f"  Checkpoint directory: {checkpoint_dir}")
print(f"  Training samples: {len(combined_train_texts)}")
print("  Batch size: 4 × 4 (gradient accumulation)")
print("  Total epochs: 1")

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/3028 [00:00<?, ? examples/s]

Map (num_proc=11):   0%|          | 0/3028 [00:00<?, ? examples/s]

Training configuration:

  Model: Qwen2.5-Coder-0.5B-Instruct
  Run: run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101
  Output directory: output/run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101
  Checkpoint directory: output/run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101/checkpoints
  Training samples: 3028
  Batch size: 4 × 4 (gradient accumulation)
  Total epochs: 1


In [68]:
# batch = next(iter(trainer.get_train_dataloader()))
# batch = next(iter(trainer.get_train_dataloader()))
# batch = next(iter(trainer.get_train_dataloader()))
# print(batch["input_ids"][0])
# labels = batch["labels"][0]
# print(labels)
# print("trainable:", (labels != -100).sum().item(), " / total:", labels.numel())


In [None]:
# input_ids = batch["input_ids"][0].tolist()
# labels = batch["labels"][0].tolist()
# first_train = next(i for i,x in enumerate(labels) if x != -100)
# print("first_train:", first_train)
# print(tokenizer.decode([n for n in labels if n != -100], skip_special_tokens=True))

first_train: 188
```python
current_cost = 5 * 20
increase = current_cost * 25 // 100
print(increase)
```



In [70]:
## Execute Training

print(f"\n{'='*70}")
print(f"Starting training...")
print(f"{'='*70}\n")

trainer.train()

print(f"\n{'='*70}")
print(f"Training completed successfully!")
print(f"{'='*70}\n")


Starting training...



==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 3,028 | Num Epochs = 1 | Total steps = 95
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 8,798,208 of 502,830,976 (1.75% trained)


Step,Training Loss
50,0.7202



Training completed successfully!



In [71]:
## Save Final Model and Configuration

print(f"\nSaving final model to {model_output_dir}...")

# Save the model (including LoRA weights)
trainer.model.save_pretrained(str(model_output_dir))
tokenizer.save_pretrained(str(model_output_dir))

print("✓ Model saved successfully")
print(f"  Location: {model_output_dir}")
print("  Files: model weights, tokenizer config, special tokens map")

# Extract and save actual training configuration from trainer
print(f"\nExtracting and saving training configuration...")

args = trainer.args

# Save configuration as markdown
with open(config_file, 'w') as f:
    f.write("# Training Configuration\n\n")
    f.write(f"**Run Name**: {run_name}\n\n")
    f.write(f"**Model**: {model_name}\n\n")
    
    f.write("## Model Configuration\n\n")
    f.write(f"- Max Sequence Length: {max_seq_length}\n")
    f.write(f"- Data Type: {'auto' if dtype is None else str(dtype)}\n")
    f.write(f"- Load in 4bit: {load_in_4bit}\n\n")
    
    f.write("## LoRA Configuration\n\n")
    f.write("- R: 16\n")
    f.write("- LORA_ALPHA: 32\n")
    f.write("- LORA_DROPOUT: 0\n")
    f.write("- TARGET_MODULES: ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj']\n\n")
    
    f.write("## Dataset Configuration\n\n")
    f.write(f"- Total Samples: {len(train_dataset)}\n\n")
    
    f.write("## Training Arguments (from trainer.args)\n\n")
    f.write(f"- per_device_train_batch_size: {args.per_device_train_batch_size}\n")
    f.write(f"- gradient_accumulation_steps: {args.gradient_accumulation_steps}\n")
    f.write(f"- warmup_ratio: {args.warmup_ratio}\n")
    f.write(f"- warmup_steps: {args.warmup_steps}\n")
    f.write(f"- num_train_epochs: {args.num_train_epochs}\n")
    f.write(f"- learning_rate: {args.learning_rate}\n")
    f.write(f"- fp16: {args.fp16}\n")
    f.write(f"- bf16: {args.bf16}\n")
    f.write(f"- logging_steps: {args.logging_steps}\n")
    f.write(f"- optim: {args.optim}\n")
    f.write(f"- weight_decay: {args.weight_decay}\n")
    f.write(f"- lr_scheduler_type: {args.lr_scheduler_type}\n")
    f.write(f"- seed: {args.seed}\n")
    f.write(f"- save_strategy: {args.save_strategy}\n")
    f.write(f"- save_total_limit: {args.save_total_limit}\n\n")
    
    f.write("## Output Directories\n\n")
    f.write(f"- Output Base: {output_dir}\n")
    f.write(f"- Checkpoints: {checkpoint_dir}\n")
    f.write(f"- Final Model: {model_output_dir}\n")

print(f"✓ Training configuration saved to: {config_file}")

# Summary
print(f"\n{'='*70}")
print("TRAINING SUMMARY")
print(f"{'='*70}")
print(f"Run Name: {run_name}")
print(f"Output Directory: {output_dir}")
print("  ├── checkpoints/  (Training checkpoints)")
print("  ├── model/        (Final trained model)")
print("  └── training_config.md  (Configuration reference)")
print(f"{'='*70}")


Saving final model to output/run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101/model...
✓ Model saved successfully
  Location: output/run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101/model
  Files: model weights, tokenizer config, special tokens map

Extracting and saving training configuration...
✓ Training configuration saved to: output/run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101/training_config.md

TRAINING SUMMARY
Run Name: run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101
Output Directory: output/run_Qwen2.5-Coder-0.5B-Instruct_20260208_013101
  ├── checkpoints/  (Training checkpoints)
  ├── model/        (Final trained model)
  └── training_config.md  (Configuration reference)
