In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training

model_id = 'Qwen/Qwen2.5-3B'
modules_to_save=["embed_tokens", "input_layernorm", "post_attention_layernorm", "norm"]
lora_alpha = 64
lora_dropout = 0.1
lora_r = 64

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16,
    device_map='auto',
    load_in_8bit=True
)

tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir='cache_dir')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    modules_to_save=modules_to_save,
)

model = prepare_model_for_kbit_training(base_model)
model = PeftModel(base_model, peft_config, adapter_name = 'lora_sft')
model.print_trainable_parameters()

dataset = load_dataset("simplescaling/s1K-1.1", split="train")

def format(x):
    x['text'] = f"""### Question: {x['question']}\n ### Answer: {x['gemini_attempt']}"""
    return x

ds = dataset.map(format)
ds_tokenized = ds.map(lambda x : tokenizer(x['text'], padding='longest', truncation=True), batched=True) #return_tensors='pt'
ds_tokenized = ds_tokenized.remove_columns(['solution', 'question', 'cot_type', 'source_type', 'metadata', 'gemini_thinking_trajectory', 'gemini_attempt', 'deepseek_thinking_trajectory', 'deepseek_attempt', 'gemini_grade', 'gemini_grade_reason', 'deepseek_grade', 'deepseek_grade_reason', 'text'])

response_template = " ### Answer:"
instruction_template = "### Question:"

collator = DataCollatorForCompletionOnlyLM(response_template=response_template, instruction_template=instruction_template, tokenizer=tokenizer)


ModuleNotFoundError: No module named 'trl'

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM
from datasets import load_dataset
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training

model_id = 'Qwen/Qwen2.5-3B'
modules_to_save=["embed_tokens", "input_layernorm", "post_attention_layernorm", "norm"]
lora_alpha = 64
lora_dropout = 0.1
lora_r = 64

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16,
    device_map='auto',
    load_in_8bit=True
)

tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir='cache_dir')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    modules_to_save=modules_to_save,
)

model = prepare_model_for_kbit_training(base_model)
model = PeftModel(base_model, peft_config, adapter_name = 'lora_sft')
model.print_trainable_parameters()

dataset = load_dataset("simplescaling/s1K-1.1", split="train")

def format(x):
    x['text'] = f"""### Question: {x['question']}\n ### Answer: {x['gemini_attempt']}"""
    return x

ds = dataset.map(format)
ds_tokenized = ds.map(lambda x : tokenizer(x['text'], padding='longest', truncation=True), batched=True) #return_tensors='pt'
ds_tokenized = ds_tokenized.remove_columns(['solution', 'question', 'cot_type', 'source_type', 'metadata', 'gemini_thinking_trajectory', 'gemini_attempt', 'deepseek_thinking_trajectory', 'deepseek_attempt', 'gemini_grade', 'gemini_grade_reason', 'deepseek_grade', 'deepseek_grade_reason', 'text'])

response_template = " ### Answer:"
instruction_template = "### Question:"

collator = DataCollatorForCompletionOnlyLM(response_template=response_template, instruction_template=instruction_template, tokenizer=tokenizer)


In [None]:
training_args = SFTConfig(
        output_dir="./s1_lora_finetuned",
        num_train_epochs=3,
        per_device_train_batch_size=1,
        max_length=1024,
        gradient_accumulation_steps=8,
        gradient_checkpointing=True,
        save_steps=40,
        logging_steps=5,
        learning_rate=2e-4, #0.0002
        weight_decay=0.01,
        max_grad_norm = 0.3,
        bf16=True,
        # gradient_checkpointing_kwargs = {"use_reentrant": False}, #specific to FSDP/DDP
        remove_unused_columns=False,
        report_to=None,
        group_by_length=True,
        lr_scheduler_type='cosine',
        optim="adamw_torch",
    )
trainer = SFTTrainer(
        model,
        data_collator=collator,
        peft_config=peft_config,
        args=training_args,
        train_dataset=ds_tokenized,
        processing_class=tokenizer
    )

trainer.train()
trainer.model.save_pretrained('math_longcot_finetuned_qwen_base')

In [None]:
!pip3 install trl