In [1]:
!pip install -q transformers
!pip install -U datasets
!pip install -q torch



In [2]:
import torch
import transformers
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType
import gc

In [4]:
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [6]:
model = AutoModelForCausalLM.from_pretrained(
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    torch_dtype = torch.float16,
    device_map="auto",
    trust_remote_code=True,)

In [7]:
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training

In [8]:
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

In [9]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 36,929,536 || all params: 1,814,017,536 || trainable%: 2.0358


In [10]:
print(f"Model loaded: {model.num_parameters() / 1e6:.1f}M parameters")

Model loaded: 1814.0M parameters


In [11]:
from datasets import load_dataset

In [12]:
dataset = load_dataset("microsoft/orca-math-word-problems-200k", split="train[:3000]")

In [13]:
print(f"Dataset size: {len(dataset)}")
print("Sample:", dataset[0])

Dataset size: 3000
Sample: {'question': 'Jungkook is the 5th place. Find the number of people who crossed the finish line faster than Jungkook.', 'answer': 'If Jungkook is in 5th place, then 4 people crossed the finish line faster than him.'}


In [14]:
def format_math_qa(example):
    question = example["question"]
    answer = example["answer"]

    prompt = f"<|user|>\n{question}\n<|assistant|>\n{answer}<|end|>"

    return {"text": prompt}

In [15]:
formatted_dataset = dataset.map(format_math_qa, remove_columns=dataset.column_names)

In [16]:
train_val_split = formatted_dataset.train_test_split(seed=42,test_size=0.2)

In [17]:
train_dataset =  train_val_split['train']
eval_dataset =  train_val_split['test']

In [18]:
def tokenize_function(examples):
    result = tokenizer(
        examples["text"],
        truncation=True,
        padding=True,
        max_length=512,
        return_tensors="pt",
        return_overflowing_tokens=False,
    )
    result["labels"] = result["input_ids"].clone()
    return result

In [19]:
train_dataset = train_val_split["train"].map(
    tokenize_function,
    batched=True,
    remove_columns=train_val_split["train"].column_names,
    desc="Tokenizing training data"
)

eval_dataset = train_val_split["test"].map(
    tokenize_function,
    batched=True,
    remove_columns=train_val_split["test"].column_names,
    desc="Tokenizing evaluation data"
)

Tokenizing evaluation data:   0%|          | 0/600 [00:00<?, ? examples/s]

In [20]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

In [21]:
training_args = TrainingArguments(
    output_dir="./deepseek-r1-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=16,
    dataloader_pin_memory=False,
    dataloader_num_workers=0,
    warmup_steps=100,
    learning_rate=2e-4,
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    logging_steps=25,
    eval_strategy="steps",
    eval_steps=200,
    save_strategy="steps",
    save_steps=400,
    save_total_limit=2,
    load_best_model_at_end=True,
    remove_unused_columns=False,
    report_to=None,
    seed=42,
)

In [22]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

In [23]:
trainer = Trainer(model=model,
                  args=training_args,
                  train_dataset=train_dataset,
                  eval_dataset=eval_dataset,
                  data_collator=data_collator,
                  tokenizer=tokenizer)

  trainer = Trainer(model=model,
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [24]:
import os
import gc

gc.collect()
torch.cuda.empty_cache()
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = 'expandable_segments:True'

In [None]:
try:
    train_result = trainer.train()

    print("\n🎉 Training completed!")
    print(f"📊 Final training loss: {train_result.metrics['train_loss']:.4f}")
    print(f"🕒 Training time: {train_result.metrics['train_runtime']:.2f} seconds")
    print(f"⚡ Samples/second: {train_result.metrics['train_samples_per_second']:.2f}")

except Exception as e:
    print(f"❌ Training failed: {e}")
    raise

[34m[1mwandb[0m: Currently logged in as: [33mnazm-albatros[0m ([33mnazm-albatros-techtetech[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
trainer.save_model("./deepseek-r1-math-adapter")
tokenizer.save_pretrained("./deepseek-r1-math-adapter")

In [None]:
def generate_math_response(prompt, max_length=300):
    inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_length=max_length,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response[len(tokenizer.decode(inputs[0], skip_special_tokens=True)):]
    return response.strip()

In [None]:
test_problems = [
    "<|user|>\nWhat is 25 + 37?\n<|assistant|>\n",
    "<|user|>\nIf a pizza costs $12 and I want to buy 3 pizzas, how much will I pay?\n<|assistant|>\n",
    "<|user|>\nSolve for x: 2x + 5 = 15\n<|assistant|>\n"
]

In [None]:
for i, problem in enumerate(test_problems, 1):
    user_question = problem.split("<|user|>")[1].split("<|assistant|>")[0].strip()
    print(f"\n🧮 Test {i}: {user_question}")
    print("-" * 40)

    try:
        response = generate_math_response(problem)
        print(f"🤖 Model: {response}")
    except Exception as e:
        print(f"❌ Error: {e}")