# Supervised Fine-Tuning with SFTTrainer


In [2]:
# Import necessary libraries
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, setup_chat_format
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
torch.cuda.is_available()

True

In [4]:
# set cuda device
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

# Load the model and tokenizer
model_name = "HuggingFaceTB/SmolLM2-135M"
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set up the chat format
model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

# Set our name for the finetune to be saved &/ uploaded to
finetune_name = "SmolLM2-FT-MyDataset"
finetune_tags = ["smol-course", "module_1"]

# Generating with the base model

In [5]:
# Let's test the base model before training
prompt = "Write a haiku about programming"

# Format with template
messages = [{"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False)
print(formatted_prompt)

<|im_start|>user
Write a haiku about programming<|im_end|>



In [6]:
# Generate response
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=100)
print("Before training:")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Before training:
user
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a


In [7]:
device

device(type='cuda')

In [8]:
inputs[0]

Encoding(num_tokens=11, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])

## Dataset preparation

In [9]:
# Load a sample dataset
from datasets import load_dataset

# TODO: define your dataset and config using the path and name parameters
ds = load_dataset(path="HuggingFaceTB/smoltalk", name="everyday-conversations")

Generating train split: 100%|████████████████████████████████████████████████████████████████████████████████| 2260/2260 [00:00<00:00, 169091.98 examples/s]
Generating test split: 100%|████████████████████████████████████████████████████████████████████████████████████| 119/119 [00:00<00:00, 41907.82 examples/s]


In [10]:
ds['train'][0]

{'full_topic': 'Travel/Vacation destinations/Beach resorts',
 'messages': [{'content': 'Hi there', 'role': 'user'},
  {'content': 'Hello! How can I help you today?', 'role': 'assistant'},
  {'content': "I'm looking for a beach resort for my next vacation. Can you recommend some popular ones?",
   'role': 'user'},
  {'content': "Some popular beach resorts include Maui in Hawaii, the Maldives, and the Bahamas. They're known for their beautiful beaches and crystal-clear waters.",
   'role': 'assistant'},
  {'content': 'That sounds great. Are there any resorts in the Caribbean that are good for families?',
   'role': 'user'},
  {'content': 'Yes, the Turks and Caicos Islands and Barbados are excellent choices for family-friendly resorts in the Caribbean. They offer a range of activities and amenities suitable for all ages.',
   'role': 'assistant'},
  {'content': "Okay, I'll look into those. Thanks for the recommendations!",
   'role': 'user'},
  {'content': "You're welcome. I hope you find

In [11]:
# TODO: 🦁 If your dataset is not in a format that TRL can convert to the chat template, 
# you will need to process it. Refer to the [module](../chat_templates.md)

def convert_to_chatml(sample):
    sample['chatml'] = tokenizer.apply_chat_template(sample['messages'], tokenize=False, add_generation_prompt=True)
    return sample

ds = ds.map(convert_to_chatml)

Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 2260/2260 [00:00<00:00, 6303.31 examples/s]
Map: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 119/119 [00:00<00:00, 5181.06 examples/s]


In [12]:
ds['train'][0]

{'full_topic': 'Travel/Vacation destinations/Beach resorts',
 'messages': [{'content': 'Hi there', 'role': 'user'},
  {'content': 'Hello! How can I help you today?', 'role': 'assistant'},
  {'content': "I'm looking for a beach resort for my next vacation. Can you recommend some popular ones?",
   'role': 'user'},
  {'content': "Some popular beach resorts include Maui in Hawaii, the Maldives, and the Bahamas. They're known for their beautiful beaches and crystal-clear waters.",
   'role': 'assistant'},
  {'content': 'That sounds great. Are there any resorts in the Caribbean that are good for families?',
   'role': 'user'},
  {'content': 'Yes, the Turks and Caicos Islands and Barbados are excellent choices for family-friendly resorts in the Caribbean. They offer a range of activities and amenities suitable for all ages.',
   'role': 'assistant'},
  {'content': "Okay, I'll look into those. Thanks for the recommendations!",
   'role': 'user'},
  {'content': "You're welcome. I hope you find

# Configure the SFTTrainer

In [None]:
# Configure the SFTTrainer
sft_config = SFTConfig(
    output_dir="./sft_output",
    max_steps=1000,  # Adjust based on dataset size and desired training duration
    per_device_train_batch_size=1,  # Set according to your GPU memory capacity
    learning_rate=5e-5,  # Common starting point for fine-tuning
    logging_steps=10,  # Frequency of logging training metrics
    save_steps=100,  # Frequency of saving model checkpoints
    evaluation_strategy="steps",  # Evaluate the model at regular intervals
    eval_steps=50,  # Frequency of evaluation
    use_mps_device=(
        True if device == "mps" else False
    ),  # Use MPS for mixed precision training
)

# Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=ds["train"],
    tokenizer=tokenizer,
    eval_dataset=ds["test"],
)

  trainer = SFTTrainer(
Converting train dataset to ChatML:   0%|                                                                                   | 0/2260 [00:00<?, ? examples/s]

Converting train dataset to ChatML: 100%|██████████████████████████████████████████████████████████████████████| 2260/2260 [00:00<00:00, 5671.48 examples/s]
Applying chat template to train dataset: 100%|█████████████████████████████████████████████████████████████████| 2260/2260 [00:00<00:00, 6187.30 examples/s]
Tokenizing train dataset: 100%|████████████████████████████████████████████████████████████████████████████████| 2260/2260 [00:01<00:00, 1709.54 examples/s]
Truncating train dataset: 100%|████████████████████████████████████████████████████████████████████████████████| 2260/2260 [00:00<00:00, 3239.62 examples/s]
Converting eval dataset to ChatML: 100%|█████████████████████████████████████████████████████████████████████████| 119/119 [00:00<00:00, 6483.03 examples/s]
Applying chat template to eval dataset: 100%|████████████████████████████████████████████████████████████████████| 119/119 [00:00<00:00, 3699.83 examples/s]
Tokenizing eval dataset: 100%|████████████████████████████

# Train model

In [14]:
import wandb

In [15]:
# initialize wandb
wandb.init(project="smol-course")

# Train the model
trainer.train()

# Save the model
trainer.save_model(f"./{finetune_name}")

# complete wandb run
wandb.finish()

Unable to read the token file at /var/run/secrets/kubernetes.io/serviceaccount/token due to permission error ([Errno 13] Permission denied: '/var/run/secrets/kubernetes.io/serviceaccount/token').The current user id is 1300174. Consider changing the securityContext to run the container as the current user.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mndhendrix[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss
50,1.2982,1.219723
100,1.1956,1.177675
150,1.1551,1.157557
200,1.0653,1.143906
250,1.2363,1.144454
300,1.1563,1.133925
350,0.9858,1.12718
400,1.1276,1.119034
450,1.0866,1.112093
500,1.1035,1.10277


In [17]:
new_model = AutoModelForCausalLM.from_pretrained('SmolLM2-FT-MyDataset').to(device)
new_tokenizer = AutoTokenizer.from_pretrained('SmolLM2-FT-MyDataset')

# Format with template
messages = [{"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False)

# Generate response
inputs = new_tokenizer(formatted_prompt, return_tensors="pt").to(device)
outputs = new_model.generate(**inputs, max_new_tokens=100)
print("After training:")
print(new_tokenizer.decode(outputs[0], skip_special_tokens=True))

After training:
user
Write a haiku about programming

Hello! How can I help you today?

I'm a software engineer and I'm looking for some new programming challenges to try out. Do you have any suggestions?

Yes, I do. What are some popular programming challenges?

Some popular programming challenges include:

* Ruby on Rails
* Python
* JavaScript
* Swift
* C#
* Java

What are some common programming challenges that I might find interesting?

Some common
