## Stage 1: Supervised Fine-tuning

#### 1. Install all packages

In [1]:
!pip install -q transformers trl peft bitsandbytes datasets
!pip install -q rouge_score bert_score
!pip install -q evaluate nltk

#### 2. Import all modules

In [2]:
import torch
import os
from datasets import load_dataset, DatasetDict
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    DataCollatorForLanguageModeling,
    EarlyStoppingCallback
)

from trl import SFTTrainer, SFTConfig
from tqdm import tqdm
import evaluate
import numpy as np

In [3]:
import warnings
warnings.filterwarnings("ignore", message=".*use_reentrant.*")

In [4]:
class Config:
    base_model_id = "Qwen/Qwen2.5-3B"
    sft_model_id = "./qwen-2.5-3b-sft-truthfulqa/sft"
    dpo_model_id = "./qwen-2.5-3b-dpo-truthfulqa/dpo"

    dataset_id = "truthfulqa/truthful_qa"

config = Config()

#### 3. Prepare dataset

In [5]:
def format_sample(sample):
    question = sample.get("question", "").strip()
    answer = sample.get("best_answer", "").strip()

    formatted_text = f"Question: {question}\nAnswer: {answer}"
    
    return {"text": formatted_text}


In [6]:
full_dataset = load_dataset(config.dataset_id, "generation")["validation"]

In [7]:
tokenizer = AutoTokenizer.from_pretrained(config.base_model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

In [8]:
def tokenize_dataset(element):
    sample = format_sample(element)
    outputs = tokenizer(
        sample["text"],
        truncation=True,
        padding=False,
        max_length=512,
        return_overflowing_tokens=False,
        return_length=False,
    )
    return {
        "input_ids": outputs["input_ids"],
        "attention_mask": outputs["attention_mask"]
    }

#### 4. Preprocess dataset

In [9]:
tokenized_dataset = full_dataset.map(
    tokenize_dataset,
    batched=False,
    remove_columns=full_dataset.column_names
)
split_dataset = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
temp_split = split_dataset["test"].train_test_split(test_size=0.5, seed=42)


dataset_split = {
    "train": split_dataset["train"],
    "validation": temp_split["train"],
    "test": temp_split["test"]
}

#### 5. Initialize QLoRA

In [10]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

#### 6. Download SLM

In [11]:
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### 7. Initialize hyperparameters

In [12]:
sft_training_args = SFTConfig(
    per_device_train_batch_size=32,
    gradient_accumulation_steps=2,
    warmup_steps=100,
    num_train_epochs=100,
    learning_rate=3e-5,
    fp16=True,
    bf16=False,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=42,
    output_dir=config.sft_model_id,
    report_to="tensorboard",
    logging_steps=1,
    eval_strategy="epoch",
    save_total_limit=1,
    save_strategy="best",
    metric_for_best_model="eval_loss",
    load_best_model_at_end=True,
    max_seq_length=2048,
    dataset_num_proc=4,
    packing=False,
)

#### 8. Intialize data collator

In [13]:
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

#### 9. Prepare evaluation metrics

In [14]:
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")
exact_match_metric = evaluate.load("exact_match")


def compute_metrics(eval_preds):
    predictions, labels = eval_preds

    # Handle tuple for predictions
    predictions = predictions[0] if isinstance(predictions, tuple) else predictions

    # Convert logits to predicted token IDs if needed
    if predictions.ndim == 3:
        predictions = predictions.argmax(-1)

    # Convert tensors to lists
    if hasattr(predictions, "tolist"):
        predictions = predictions.tolist()
    if hasattr(labels, "tolist"):
        labels = labels.tolist()

    # Replace -100 in labels with tokenizer.pad_token_id for decoding
    labels = [
        [token if token != -100 else tokenizer.pad_token_id for token in label_seq]
        for label_seq in labels
    ]

    # Decode predictions and labels
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Strip extra whitespace
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [label.strip() for label in decoded_labels]

    bleu_result = bleu.compute(predictions=decoded_preds, references=decoded_labels)
    rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    bertscore_result = bertscore.compute(predictions=decoded_preds, references=decoded_labels, lang="en")

    return {
        "bleu": bleu_result["bleu"],
        "rouge1": rouge_result["rouge1"],
        "rougeL": rouge_result["rougeL"],
        "bertscore_f1": np.mean(bertscore_result["f1"]),
    }


#### 10. Fine tune the SLM

In [15]:
sft_trainer = SFTTrainer(
    model=model,
    args=sft_training_args,
    train_dataset=dataset_split["train"],
    eval_dataset=dataset_split["test"],
    peft_config=lora_config,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [16]:
train_history = sft_trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss,Bleu,Rouge1,Rougel,Bertscore F1
1,2.4438,2.44846,0.176618,0.484373,0.445929,0.863016
2,2.4064,2.431044,0.171053,0.483627,0.44392,0.864467
3,2.1687,2.400669,0.171429,0.487869,0.448784,0.866754
4,2.4726,2.352984,0.174265,0.491813,0.453535,0.867944
5,2.3145,2.284949,0.205219,0.509459,0.46896,0.867765
6,2.303,2.200385,0.218563,0.526093,0.48267,0.871496
7,2.2582,2.119777,0.241026,0.560808,0.509079,0.877684
8,2.1535,2.035911,0.254487,0.567482,0.515033,0.879413
9,1.7679,1.942786,0.256733,0.567723,0.517811,0.880178
10,1.8339,1.860202,0.261583,0.573882,0.518836,0.880767


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
train_history

TrainOutput(global_step=360, training_loss=1.8511671821276348, metrics={'train_runtime': 377.314, 'train_samples_per_second': 194.798, 'train_steps_per_second': 3.18, 'total_flos': 1.831223777181696e+16, 'train_loss': 1.8511671821276348})

In [18]:
eval_history= sft_trainer.evaluate()

In [19]:
eval_history

{'eval_loss': 1.5551425218582153,
 'eval_bleu': 0.29501719344848476,
 'eval_rouge1': 0.6079930559773326,
 'eval_rougeL': 0.5767949236020923,
 'eval_bertscore_f1': 0.8843864958460738,
 'eval_runtime': 1.5284,
 'eval_samples_per_second': 26.825,
 'eval_steps_per_second': 3.926}

In [24]:
%pwd

'/teamspace/studios/this_studio'

In [26]:
import zipfile
import os

def zip_all_files(output_filename='stage-1.zip', directory='qwen-2.5-3b-sft-truthfulqa'):
    with zipfile.ZipFile(output_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for foldername, subfolders, filenames in os.walk(directory):
            for filename in filenames:
                file_path = os.path.join(foldername, filename)
                # Skip hidden files and system files if desired
                if not filename.startswith('.') and '__pycache__' not in file_path:
                    zipf.write(file_path, os.path.relpath(file_path, directory))

zip_all_files()
