# Supervised Fine-Tuning

PLEASE NOTE: The contents of this notebook were adapted from the documentation of unsloth. The reference notebook can be found [here](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(14B)-Reasoning-Conversational.ipynb#scrollTo=eevc7tY-SO9V).

This notebook contains the SFT procedure.

## Imports

In [None]:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install transformers==4.51.3
!pip install --no-deps unsloth

In [None]:
import os
from google.colab import userdata, drive

import torch
import wandb
from datasets import load_dataset, Dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from huggingface_hub import login
from trl import SFTTrainer, SFTConfig

In [None]:
os.environ['WANDB_API_KEY'] = userdata.get('WB_TOKEN')
wandb.login()

os.environ['HF_TOKEN'] = userdata.get('HF_WRITE')
login(token = os.environ['HF_TOKEN'])

## Training

### Load Model

In [None]:
model_string = 'Qwen2.5-3B-Instruct'
MODEL_NAME = f'Qwen/{model_string}'
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name      = MODEL_NAME,
    max_seq_length  = 2048,
    load_in_4bit    = False,
    full_finetuning = False,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r               = 16,
    target_modules  = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha      = 32,
    lora_dropout    = 0.01,
    bias            = 'none',
    random_state    = 42,
    use_gradient_checkpointing = False,
)

### Load Data

In [None]:
drive.mount('/content/drive')
raw_dataset = load_dataset( 'parquet',
    data_files={
        'train': '/content/drive/MyDrive/practical_course2/data/agent_finetune.parquet',
    }
)
raw_dataset = raw_dataset['train']

In [None]:
def generate_conversation(samples):
    prompt      = samples['prompt']
    completion  = samples['completion']

    conversations = []
    for prompt, comp in zip(prompt, completion):
        conversations.append([
            {'role' : 'user',       'content' : prompt},
            {'role' : 'assistant',  'content' : comp},
        ])
    return { 'text': conversations}

In [None]:
conversations = tokenizer.apply_chat_template(
    raw_dataset.map(generate_conversation, batched = True)['text'],
    tokenize = False,
    enable_thinking = False
)

dataset = Dataset.from_list([{"text": entry} for entry in conversations])

In [None]:
# Create splits
train_test  = dataset.train_test_split(test_size=0.2, seed=42)
test_eval   = train_test['test'].train_test_split(test_size=0.5, seed=42)

train_dataset = train_test['train']
eval_dataset  = test_eval['train']
test_dataset  = test_eval['test']

### Train Model

In [None]:
TRAIN_BATCH_SIZE    = 16
ACCUMULATION_STEPS  = 8
SAVE_STEPS          = 50

PROJECT_NAME = 'pr2-finetune'

LR = 2e-4
WS = 30
RUN_NAME = f'{model_string}_lr{LR}_ws{WS}_cosine'
sft_config = SFTConfig(
    dataset_text_field = 'text',

    # Logging
    logging_strategy    = 'steps',
    logging_steps       = SAVE_STEPS,
    report_to           = 'wandb',
    run_name            = PROJECT_NAME + MODEL_NAME,

    # Batch and dataloading
    per_device_train_batch_size = TRAIN_BATCH_SIZE,
    per_device_eval_batch_size  = 2 * TRAIN_BATCH_SIZE,
    gradient_accumulation_steps = ACCUMULATION_STEPS,
    dataloader_num_workers      = 4,

    # Train hyperparameters
    learning_rate   = LR,
    lr_scheduler_type = 'cosine',
    weight_decay    = 0.01,
    warmup_steps    = WS,
    fp16            = not is_bfloat16_supported(),
    bf16            = is_bfloat16_supported(),

    # Training epochs and model saving
    num_train_epochs    = 3,
    eval_strategy       = 'steps',
    output_dir          = './out/',
    save_strategy       = 'steps',
    save_steps          = SAVE_STEPS,
    save_total_limit    = 2,
    load_best_model_at_end = True,

    # Misc.
    seed        = 42,
    data_seed   = 42,
)

In [None]:
wandb.init(
    project = PROJECT_NAME,
    name    = RUN_NAME
)

In [None]:
trainer = SFTTrainer(
    model = model,
    processing_class = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    args = sft_config,
)

In [None]:
trainer_stats = trainer.train()

wandb.finish()

save_name = 'nicomu99/finetune-' + RUN_NAME
model.push_to_hub(save_name, private=True)
tokenizer.push_to_hub(save_name, private=True)

## Testing

### Load Model

In [None]:
# Load base model
model_string = 'Qwen2.5-3B'
adapter_name = f'nicomu99/{model_string}-persona-SFT'
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name      = adapter_name,
    max_seq_length  = 2048,
    load_in_4bit    = False,
    full_finetuning = False,
)

### Load & Prepare Data

In [None]:
drive.mount('/content/drive')
raw_dataset = load_dataset( 'parquet',
    data_files={
        'train': '/content/drive/MyDrive/practical_course2/data/agent_finetune.parquet',
    }
)
raw_dataset = raw_dataset['train']

In [None]:
# Create splits
train_test  = raw_dataset.train_test_split(test_size=0.2, seed=42)
test_eval   = train_test['test'].train_test_split(test_size=0.5, seed=42)

train_dataset = train_test['train']
raw_test_dataset  = test_eval['test']

In [None]:
def generate_conversation(samples):
    prompt      = samples['prompt']
    completion  = samples['completion']

    conversations = []
    for prompt, comp in zip(prompt, completion):
        conversations.append([
            {'role' : 'user',       'content' : prompt},
            {'role' : 'assistant',  'content' : comp},
        ])
    return {'text': conversations}

In [None]:
conversations_train = tokenizer.apply_chat_template(
    train_dataset.map(generate_conversation, batched = True)['text'],
    tokenize = False,
)

train_dataset = Dataset.from_list([{'text': entry} for entry in conversations_train])

In [None]:
conversations_test = tokenizer.apply_chat_template(
    raw_test_dataset.map(generate_conversation, batched = True)['text'],
    tokenize = False,
)

test_dataset = Dataset.from_list([{'text': entry} for entry in conversations_train])

### Evaluation Loop

In [None]:
PROJECT_NAME = 'pr2-finetune-testing'
run_name = adapter_name
wandb.init(
    project = PROJECT_NAME,
    name    = run_name
)

In [None]:
TRAIN_BATCH_SIZE = 16
sft_config = SFTConfig(
    dataset_text_field = 'text',

    # Logging
    logging_strategy    = 'steps',
    logging_steps       = 10,
    report_to           = 'wandb',
    run_name            = PROJECT_NAME + run_name,

    # Batch and dataloading
    per_device_eval_batch_size  = 2 * TRAIN_BATCH_SIZE,
    dataloader_num_workers      = 4,

    # Train hyperparameters
    fp16            = not is_bfloat16_supported(),
    bf16            = is_bfloat16_supported(),

    # Misc.
    seed        = 42,
    data_seed   = 42,
)

In [None]:
trainer = SFTTrainer(
    model = model,
    processing_class = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    args = sft_config,
)

In [None]:
stats = trainer.evaluate()

wandb.finish()

## Qualitative Inference Testing

NOTE: This section was adapted from [this section](https://docs.unsloth.ai/get-started/fine-tuning-guide#evaluation) of unsloth's documentation.

In [None]:
finished_model = f'nicomu99/finetune-Qwen2.5-3B-Instruct_lr0.0002_ws30_cosine'
q_model, q_tokenizer = FastLanguageModel.from_pretrained(
    model_name      = finished_model,
    max_seq_length  = 2048,
    load_in_4bit    = False,
    full_finetuning = False,
)

In [None]:
FastLanguageModel.for_inference(q_model)

In [None]:
def generate_input_prompt(sample):
    return [{'role' : 'user', 'content' : sample['prompt']}]

In [None]:
input_ids = q_tokenizer.apply_chat_template(
    generate_input_prompt(raw_dataset[1000]),
    add_generation_prompt = True,
    return_tensors = 'pt'
).to('cuda')

In [None]:
output = q_model.generate(input_ids, max_new_tokens = 256)
q_tokenizer.batch_decode(output)