<!-- TABS -->
# Finetune LLM

In [None]:
# <testing: >
# !pip install trl datasets transformers bitsandbytes peft

In [None]:
# <testing: >
import os
os.environ["SUPERDUPERDB_DATA_BACKEND"] = 'mongodb://localhost:27017/llm'
os.environ["SUPERDUPERDB_ARTIFACT_STORE"] = "filesystem://./outoput/artifact_store"

In [None]:
# <testing: >
from superduperdb import superduper
from superduperdb.backends.mongodb import Collection
from superduperdb.base.document import Document

db = superduper(os.environ.get("SUPERDUPERDB_DATA_BACKEND", "mongomock://test"))
db.drop(True)
from datasets import load_dataset

model_name = "facebook/opt-350m"
dataset_name = "timdettmers/openassistant-guanaco"

dataset = load_dataset(dataset_name)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

train_documents = [
    Document({"text": example["text"], "_fold": "train"})
    for example in train_dataset
]
eval_documents = [
    Document({"text": example["text"], "_fold": "valid"})
    for example in eval_dataset
]

db.execute(Collection("datas").insert_many(train_documents[:100]))
db.execute(Collection("datas").insert_many(eval_documents[:10]))

## Finetuning

In [None]:
# <tab: Local>
from superduperdb.ext.transformers import LLM, LLMTrainer
trainer = LLMTrainer(
    identifier="llm-finetune-trainer",
    output_dir="output/finetune",
    overwrite_output_dir=True,
    num_train_epochs=3,
    save_total_limit=3,
    logging_steps=10,
    evaluation_strategy="steps",
    save_steps=50,
    eval_steps=50,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    max_seq_length=512,
    use_lora=True, 
    bits=4,
)

llm = LLM(
    identifier="llm",
    model_name_or_path=model_name,
    trainer=trainer,
    train_X="text", # Which field of data is used for training
    train_select=Collection('datas').find() # Which table data to use for training
)

llm.fit_in_db(db=db)

In [None]:
# <tab: On Ray>
from superduperdb.ext.transformers import LLM, LLMTrainer
trainer = LLMTrainer(
    identifier="llm-finetune-trainer",
    output_dir="output/finetune",
    overwrite_output_dir=True,
    num_train_epochs=3,
    save_total_limit=3,
    logging_steps=10,
    evaluation_strategy="steps",
    save_steps=50,
    eval_steps=50,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    max_seq_length=512,
    use_lora=True,
    bits=4,
    ray_address="ray://localhost:10001", # set ray_address
)

llm = LLM(
    identifier="llm",
    model_name_or_path=model_name,
    trainer=trainer,
    train_X="text", # Which field of data is used for training
    train_select=Collection('datas').find() # Which table data to use for training
)

llm.fit_in_db(db=db)

In [None]:
# <tab: Deepspeed>
# !pip install deepspeed
from superduperdb.ext.transformers import LLM, LLMTrainer
deepspeed = {
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "gradient_accumulation_steps": "auto",
    "zero_optimization": {
        "stage": 2,
    },
}

trainer = LLMTrainer(
    identifier="llm-finetune-trainer",
    output_dir="output/finetune",
    overwrite_output_dir=True,
    num_train_epochs=3,
    save_total_limit=3,
    logging_steps=10,
    evaluation_strategy="steps",
    save_steps=50,
    eval_steps=50,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    max_seq_length=512,
    use_lora=True,
    bits=4,
    deepspeed=deepspeed, # set deepspped
)

llm = LLM(
    identifier="llm",
    model_name_or_path=model_name,
    trainer=trainer,
    train_X="text", # Which field of data is used for training
    train_select=Collection('datas').find() # Which table data to use for training
)

llm.fit_in_db(db=db)

In [None]:
# <tab: Multi-GPUS>
from superduperdb.ext.transformers import LLM, LLMTrainer
trainer = LLMTrainer(
    identifier="llm-finetune-trainer",
    output_dir="output/finetune",
    overwrite_output_dir=True,
    num_train_epochs=3,
    save_total_limit=3,
    logging_steps=10,
    evaluation_strategy="steps",
    save_steps=50,
    eval_steps=50,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    max_seq_length=512,
    use_lora=True,
    bits=4,
    num_gpus=2, # set num_gpus
)

## Load the trained model
There are two methods to load a trained model:

- **Load the model directly**: This will load the model with the best metrics (if the transformers' best model save strategy is set) or the last version of the model.
- **Use a specified checkpoint**: This method downloads the specified checkpoint, then initializes the base model, and finally merges the checkpoint with the base model. This approach supports custom operations such as resetting flash_attentions, model quantization, etc., during initialization.

In [None]:
# <tab: Load Trained Model Directly>
llm = db.load("model", "llm")

In [None]:
# <tab: Use a specified checkpoint>
from superduperdb.ext.transformers import LLM, LLMTrainer
checkpoint = db.load("checkpoint", trainer.experiment_id)
llm = LLM(
    identifier="llm",
    model_name_or_path=model_name,
    adapter_id=checkpoint,
    model_kwargs=dict(load_in_4bit=True)
)

In [None]:
# <testing: >
messages = [
    {
        "role": "user",
        "content": "What is the capital of Germany? Explain why thats the case and if it was different in the past?",
    }
]
print(llm.predict_one(messages, max_new_tokens=200, do_sample=False))