In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from datasets import load_dataset
import threading
import torch
import json

In [2]:
model_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [3]:
def _preprocess_data(example):
    """Data formatting and tokenization"""
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

def _format_data( example):
    example["input_ids"] = tokenizer.encode(
        example["text"], 
        truncation=True,
        max_length=512
    )
    example["labels"] = example["input_ids"].copy()
    return example

In [4]:
dataset = load_dataset("wikitext", "wikitext-103-v1")
tokenized_dataset = dataset.map(
    _preprocess_data,
    batched=True,
    batch_size=1000
)
print(tokenized_dataset["train"][0])

{'text': '', 'input_ids': [128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 128001, 12800

In [5]:
train_test_split = tokenized_dataset["train"].train_test_split(test_size=0.1)
train_data = train_test_split["train"]
test_data = train_test_split["test"]
print(f"train samples: {len(train_data)}, Test samples: {len(test_data)}")

train samples: 1621215, Test samples: 180135


In [6]:
# Qloara config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)
print(f"{model_id} loaded successfully")
print(torch.cuda.is_available())

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

deepseek-ai/DeepSeek-R1-Distill-Llama-8B loaded successfully
True


In [12]:
from peft import get_peft_model, LoraConfig, TaskType
# LoRA config
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # adjust based on your task type
    r=16,                        # rank of the LoRA update matrices
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

In [13]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./r1_finetuned",   # where to save
    evaluation_strategy="steps",   # evaluate during training
    save_strategy="steps",         # save checkpoints
    learning_rate=2e-5,            # fine-tuning learning rate
    per_device_train_batch_size=4, # can be adjusted for gpu memory
    gradient_accumulation_steps=8, # sims larger batch size
    num_train_epochs=3,            # num of epochs
    logging_steps=100,             # log training progress
    save_steps=500,                # save every 500 steps
    fp16=True,                     # mixed precision for faster training
    push_to_hub=False              # skip pushing to hugging face
)

In [14]:
peft_model = get_peft_model(model, peft_config)

In [15]:
from transformers import Trainer

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"]
)

trainer.train()

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
wandb: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

  ········


wandb: No netrc file found, creating one.
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\rlee51\_netrc
wandb: Currently logged in as: rlee603166 (rlee603166-university-of-rochester) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,attention_mask.