# Supervised Fine-tuning Trainer

- https://huggingface.co/docs/trl/en/sft_trainer

## Quickstart

In [1]:
from datasets import load_dataset
from trl import SFTTrainer

dataset = load_dataset("imdb", split="train")

trainer = SFTTrainer(
    "facebook/opt-350m",
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512,
)
trainer.train()

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


Downloading readme: 100%|██████████| 7.81k/7.81k [00:00<00:00, 5.29MB/s]
Downloading data: 100%|██████████| 21.0M/21.0M [00:04<00:00, 4.81MB/s]
Downloading data: 100%|██████████| 20.5M/20.5M [00:03<00:00, 5.21MB/s]
Downloading data: 100%|██████████| 42.0M/42.0M [00:06<00:00, 6.52MB/s]
Generating train split: 100%|██████████| 25000/25000 [00:00<00:00, 191339.35 examples/s]
Generating test split: 100%|██████████| 25000/25000 [00:00<00:00, 246430.95 examples/s]
Generating unsupervised split: 100%|██████████| 50000/50000 [00:00<00:00, 333958.42 examples/s]
Map: 100%|██████████| 25000/25000 [00:06<00:00, 3624.32 examples/s]
  positions = (torch.cumsum(attention_mask, dim=1).type_as(attention_mask) * attention_mask).long() - 1


RuntimeError: MPS backend out of memory (MPS allocated: 8.70 GB, other allocations: 263.34 MB, max allowed: 9.07 GB). Tried to allocate 128.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [2]:
from transformers import AutoModelForCausalLM
from datasets import load_dataset
from trl import SFTTrainer

dataset = load_dataset("imdb", split="train")

model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")

trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=512,
)

trainer.train()

RuntimeError: MPS backend out of memory (MPS allocated: 9.07 GB, other allocations: 7.34 MB, max allowed: 9.07 GB). Tried to allocate 4.00 KB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

## Advanced usage

### Train on completions only

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

dataset = load_dataset("lucasmccabe-lmi/CodeAlpaca-20k", split="train")

model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        text = f"### Question: {example['instruction'][i]}\n ### Answer: {example['output'][i]}"
        output_texts.append(text)
    return output_texts

response_template = " ### Answer:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    formatting_func=formatting_prompts_func,
    data_collator=collator,
)

trainer.train()

Downloading readme: 100%|██████████| 677/677 [00:00<00:00, 1.95MB/s]
Downloading data: 100%|██████████| 3.45M/3.45M [00:02<00:00, 1.39MB/s]
Generating train split: 100%|██████████| 20022/20022 [00:00<00:00, 230775.07 examples/s]
Map: 100%|██████████| 20022/20022 [00:02<00:00, 8479.81 examples/s] 


RuntimeError: MPS backend out of memory (MPS allocated: 9.07 GB, other allocations: 7.34 MB, max allowed: 9.07 GB). Tried to allocate 98.19 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

dataset = load_dataset("timdettmers/openassistant-guanaco", split="train")

model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

instruction_template = "### Human:"
response_template = "### Assistant:"
collator = DataCollatorForCompletionOnlyLM(instruction_template=instruction_template, response_template=response_template, tokenizer=tokenizer)

trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    dataset_text_field="text",
    data_collator=collator,
)

trainer.train()

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


Repo card metadata block was not found. Setting CardData to empty.
Downloading data: 100%|██████████| 20.9M/20.9M [00:02<00:00, 7.77MB/s]
Downloading data: 100%|██████████| 1.11M/1.11M [00:01<00:00, 1.07MB/s]
Generating train split: 100%|██████████| 9846/9846 [00:00<00:00, 94311.93 examples/s]
Generating test split: 100%|██████████| 518/518 [00:00<00:00, 187152.16 examples/s]
Map: 100%|██████████| 9846/9846 [00:04<00:00, 2211.23 examples/s]
  positions = (torch.cumsum(attention_mask, dim=1).type_as(attention_mask) * attention_mask).long() - 1


RuntimeError: MPS backend out of memory (MPS allocated: 8.05 GB, other allocations: 583.84 MB, max allowed: 9.07 GB). Tried to allocate 512.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).

#### Using token_ids directly for response_template

In [2]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

def print_tokens_with_ids(txt):
    tokens = tokenizer.tokenize(txt, add_special_tokens=False)
    token_ids = tokenizer.encode(txt, add_special_tokens=False)
    print(list(zip(tokens, token_ids)))

prompt = """### User: Hello\n\n### Assistant: Hi, how can I help you?"""
print_tokens_with_ids(prompt)

response_template = "### Assistant:"
print_tokens_with_ids(response_template)

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-2-7b-hf.
401 Client Error. (Request ID: Root=1-6630c4a1-09de368f7b36e95110440272;81a7315a-c494-4985-b8f5-acca2bbdb73d)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-hf is restricted. You must be authenticated to access it.

In [3]:
response_template_with_context = "\n### Assistant:"
response_template_ids = tokenizer.encode(response_template_with_context, add_special_tokens=False)[2:]

data_collator = DataCollatorForCompletionOnlyLM(response_template_ids, tokenizer=tokenizer)

### Add Special Tokens for Chat Format

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import setup_chat_format

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

# Set up the chat format with default 'chatml' format
model, tokenizer = setup_chat_format(model, tokenizer)

### Dataset format support

In [5]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="sft_trainer",
)

In [6]:
from datasets import load_dataset
from trl import SFTTrainer

# load dataset from the HuggingFace Hub
dataset = load_dataset("philschmid/dolly-15k-oai-style", split="train")

trainer = SFTTrainer(
    "facebook/opt-350m",
    args=training_args,
    train_dataset=dataset,
    packing=True,
)

Downloading readme: 100%|██████████| 523/523 [00:00<00:00, 1.67MB/s]
Downloading data: 100%|██████████| 7.24M/7.24M [00:02<00:00, 3.17MB/s]
Generating train split: 100%|██████████| 15011/15011 [00:00<00:00, 80871.25 examples/s]

No chat template is defined for this tokenizer - using the default template for the GPT2TokenizerFast class. If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.

Generating train split: 2590 examples [00:05, 512.10 examples/s]


### Format your input prompts

In [18]:
dataset = load_dataset("HuggingFaceH4/instruction-dataset", split="test")

In [19]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['prompt'])):
        text = f"### Question: {example['prompt'][i]}\n ### Answer: {example['completion'][i]}"
        output_texts.append(text)
    return output_texts

trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    formatting_func=formatting_prompts_func,
)

trainer.train()

Map: 100%|██████████| 327/327 [00:00<00:00, 2407.47 examples/s]


RuntimeError: MPS backend out of memory (MPS allocated: 9.08 GB, other allocations: 7.84 MB, max allowed: 9.07 GB). Tried to allocate 4.00 KB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).