In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

In [None]:
import os
os.system("nvidia-smi")

In [None]:
import torch
import pandas as pd
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
torch.cuda.is_available() 

In [None]:
def load_model_and_tokenizer(model_name, use_gpu=True):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    
    if use_gpu:
        model = model.to('cuda')
    
    return model, tokenizer

In [None]:
def generate_response(model, tokenizer, user_message, system_prompt=None, max_new_tokens=100):
    messages = []
    
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    
    messages.append({"role": "user", "content": user_message})
    
    prompt = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
        enable_thinking=False)
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=max_new_tokens,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id
        )
    
    input_len = inputs["input_ids"].shape[1]
    generated_ids = outputs[0][input_len:]
    
    response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
    
    return response

In [None]:
def test_model_with_questions(model, tokenizer, questions, system_prompt=None, title="Model Output"):
    print(f"\n=== {title} ===")
    for i, question in enumerate(questions, 1):
        response = generate_response(model, tokenizer, question, system_prompt)
        print(f"Question {i}: {question}")
        print(f"Response: {response}\n")

In [None]:
questions = [
    "Give me an 1-sentence introduction of LLM.",
    "Calculate 2 + 2 - 2.",
    "What's the difference between thread and process?"
]

In [None]:
model, tokenizer = load_model_and_tokenizer("Qwen/Qwen3-0.6B-Base", use_gpu=True)

test_model_with_questions(model, tokenizer, questions, title="Qwen3-0.6B-Base (Before SFT) OUTPUT")

del model, tokenizer

In [None]:
model, tokenizer = load_model_and_tokenizer("banghua/Qwen3-0.6B-SFT", use_gpu=True)

test_model_with_questions(model, tokenizer, questions, 
                          title="Base Model (After SFT) Output")

del model, tokenizer

## SFT

In [None]:
!pip install git+https://github.com/huggingface/trl.git

In [None]:
import trl
print(trl.__version__)

In [None]:
from trl import SFTTrainer, SFTConfig

In [None]:
model_name = "Qwen/Qwen3-0.6B-Base"

model, tokenizer = load_model_and_tokenizer(model_name)

In [None]:
train_dataset = load_dataset("banghua/DL-SFT-Dataset")["train"]

In [None]:
def display_dataset(dataset):
    # Visualize the dataset 
    rows = []
    for i in range(3):
        example = dataset[i]
        user_msg = next(m['content'] for m in example['messages']
                        if m['role'] == 'user')
        assistant_msg = next(m['content'] for m in example['messages']
                             if m['role'] == 'assistant')
        rows.append({
            'User Prompt': user_msg,
            'Assistant Response': assistant_msg
        })
    
    # Display as table
    df = pd.DataFrame(rows)
    pd.set_option('display.max_colwidth', None)  # Avoid truncating long strings
    display(df)

In [None]:
display_dataset(train_dataset)

In [None]:
# SFTTrainer config
sft_config = SFTConfig(
    learning_rate=8e-5, # Learning rate for training
    num_train_epochs=1, # Set the number of epochs to train the model
    per_device_eval_batch_size=1, # Batch size for each device (e.g., GPU) during training
    gradient_accumulation_steps=8, # Number of updates steps to accumulate before performing a backward/update pass
    logging_steps=2, # Number of steps between logging events
)

In [None]:
# SFTTrainer
sft_trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=train_dataset,
    processing_class=tokenizer
)

sft_trainer.train()

In [None]:
test_model_with_questions(model, tokenizer, questions, 
                          title="Base Model (After SFT) Output")

## Upload to HF

In [None]:
!huggingface-cli login

In [None]:
path_to_save_dir = "Qwen3-0.6B-SFT"
model.save_pretrained(path_to_save_dir)
tokenizer.save_pretrained(path_to_save_dir)

In [None]:
from huggingface_hub import HfApi, HfFolder, Repository
from transformers import AutoTokenizer, AutoModelForCausalLM

# Replace with your model repo name
repo_name = "tralora/Qwen3-0.6B-SFT"
save_path = "Qwen3-0.6B-SFT"

# Upload model
model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)
