In [1]:
# Install necessary libraries
!pip install torch transformers datasets trl

Collecting trl
  Downloading trl-0.19.1-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cu

In [3]:
# Import libraries
import torch
import pandas as pd
from datasets import load_dataset
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM, SFTConfig

In [5]:
def load_model_and_tokenizer(model_name, use_gpu):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    if use_gpu:
        model = model.to('cuda')
    return model, tokenizer

def generate_responses(model, tokenizer, input_texts):
    inputs = tokenizer(input_texts, return_tensors='pt', padding=True, truncation=True)
    if next(model.parameters()).is_cuda:
        inputs = {key: value.to('cuda') for key, value in inputs.items()}
    outputs = model.generate(**inputs)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

def test_model_with_questions(model, tokenizer, questions, title="Model Output"):
    print(f"=== {title} ===")
    responses = generate_responses(model, tokenizer, questions)
    for question, response in zip(questions, responses):
        print(f"Q: {question}\nA: {response}\n")

def display_dataset(dataset):
    df = pd.DataFrame(dataset)
    print(df.head())  # Display the first few rows of the dataset

In [6]:
# Load base model & test on simple questions
USE_GPU = torch.cuda.is_available()

questions = [
    "Give me an 1-sentence introduction of LLM.",
    "Calculate 1+1-1",
    "What's the difference between thread and process?"
]

model, tokenizer = load_model_and_tokenizer("Qwen/Qwen3-0.6B-Base", USE_GPU)

test_model_with_questions(model, tokenizer, questions,
                          title="Base Model (Before SFT) Output")

del model, tokenizer

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


=== Base Model (Before SFT) Output ===
Q: Give me an 1-sentence introduction of LLM.
A: Give me an 1-sentence introduction of LLM. A large language model (LLM) is a type of artificial intelligence that can understand and generate human-like text based on its training data.

Q: Calculate 1+1-1
A: Calculate 1+1-1 the answer is 1. Can you explain why this is the case?

The expression \(1 + 1 - 1\) can be evaluated step by step. First, add 1 and 1, which equals 2. Then, subtract 1 from 2, resulting in 1. Therefore, the answer is 1. This is because the order of operations (PEMDAS/BODMAS) dictates that you perform addition before subtraction. In this case, adding 1 and 1 gives 2, and then subtracting 1 from 2 results in 1.

Q: What's the difference between thread and process?
A: What's the difference between thread and process? the difference between thread and process?
Thread and process are two fundamental concepts in operating systems, each serving distinct purposes and operating in diffe

In [7]:
# Doing SFT on a small model
model_name = "HuggingFaceTB/SmolLM2-135M"
model, tokenizer = load_model_and_tokenizer(model_name, USE_GPU)

train_dataset = load_dataset("banghua/DL-SFT-Dataset")["train"]
if not USE_GPU:
    train_dataset = train_dataset.select(range(100))

display_dataset(train_dataset)

# SFTTrainer config
sft_config = SFTConfig(
    learning_rate=8e-5,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    gradient_checkpointing=False,
    logging_steps=2,
    bf16=False,
    fp16=False,
)

sft_trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=train_dataset,
    processing_class=tokenizer,
)
sft_trainer.train()

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/831 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/704 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/347 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.21M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2961 [00:00<?, ? examples/s]

                                            messages
0  [{'content': '- The left child should have a v...
1  [{'content': 'To pass three levels must be the...
2  [{'content': 'Can you translate the text mater...
3  [{'content': 'Complete feed for exotic fishes ...
4  [{'content': 'Write a funny limerick about a p...


Tokenizing train dataset:   0%|          | 0/2961 [00:00<?, ? examples/s]

ValueError: Cannot use chat template functions because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating

In [None]:
# Testing training results on small model and small dataset
if not USE_GPU:  # move model to CPU when GPU isn’t requested
    sft_trainer.model.to("cpu")
test_model_with_questions(sft_trainer.model, tokenizer, questions,
                          title="Base Model (After SFT) Output")