<!-- TABS -->
# Build A Trainable LLM

In [None]:
# <testing: >
# !pip install trl datasets transformers bitsandbytes peft

In [None]:
# <testing: >
import os
os.environ["pinnacle_DATA_BACKEND"] = 'mongodb://localhost:27017/llm'
os.environ["pinnacle_ARTIFACT_STORE"] = "filesystem://./outoput/artifact_store"

In [None]:
# <testing: >
from pinnacle import pinnacle
from pinnacle.backends.mongodb import Collection
from pinnacle.base.document import Document

db = pinnacle(os.environ.get("pinnacle_DATA_BACKEND", "mongomock://test"))
db.drop(True)
from datasets import load_dataset

model_name = "facebook/opt-350m"
dataset_name = "timdettmers/openassistant-guanaco"

dataset = load_dataset(dataset_name)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

train_documents = [
    Document({"text": example["text"], "_fold": "train"})
    for example in train_dataset
]
eval_documents = [
    Document({"text": example["text"], "_fold": "valid"})
    for example in eval_dataset
]

db.execute(Collection("datas").insert_many(train_documents[:100]))
db.execute(Collection("datas").insert_many(eval_documents[:10]))

**Create an LLM Trainer for training**

The parameters of this LLM Trainer are basically the same as `transformers.TrainingArguments`, but some additional parameters have been added for easier training setup.

In [None]:
from pinnacle.ext.transformers import LLM, LLMTrainer
trainer = LLMTrainer(
    identifier="llm-finetune-trainer",
    output_dir="output/finetune",
    overwrite_output_dir=True,
    num_train_epochs=3,
    save_total_limit=3,
    logging_steps=10,
    evaluation_strategy="steps",
    save_steps=100,
    eval_steps=100,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    max_seq_length=512,
    key=key,
    select=select,
    transform=transform,
    training_kwargs=training_kwargs,
)

In [None]:
# <tab: Lora>
trainer.use_lora = True

In [None]:
# <tab: QLora>
trainer.use_lora = True
trainer.bits = 4

In [None]:
# <tab: Deepspeed>
!pip install deepspeed
deepspeed = {
    "train_batch_size": "auto",
    "train_micro_batch_size_per_gpu": "auto",
    "gradient_accumulation_steps": "auto",
    "zero_optimization": {
        "stage": 2,
    },
}
trainer.use_lora = True
trainer.bits = 4
trainer.deepspeed = deepspeed

In [None]:
# <tab: Multi-GPUS>
trainer.use_lora = True
trainer.bits = 4
trainer.num_gpus = 2

Create a trainable LLM model and add it to the database, then the training task will run automatically.

In [None]:
llm = LLM(
    identifier="llm",
    model_name_or_path=model_name,
    trainer=trainer,
    model_kwargs=model_kwargs,
    tokenizer_kwargs=tokenizer_kwargs,
)

db.apply(llm)

# Load the trained model
There are two methods to load a trained model:

- **Load the model directly**: This will load the model with the best metrics (if the transformers' best model save strategy is set) or the last version of the model.
- **Use a specified checkpoint**: This method downloads the specified checkpoint, then initializes the base model, and finally pinnacles the checkpoint with the base model. This approach supports custom operations such as resetting flash_attentions, model quantization, etc., during initialization.

In [None]:
# <tab: Load Trained Model Directly>
llm = db.load("model", "llm")

In [None]:
# <tab: Use a specified checkpoint>
from pinnacle.ext.transformers import LLM, LLMTrainer
experiment_id = db.show("checkpoint")[-1]
version = None # None means the last checkpoint
checkpoint = db.load("checkpoint", experiment_id, version=version)
llm = LLM(
    identifier="llm",
    model_name_or_path=model_name,
    adapter_id=checkpoint,
    model_kwargs=dict(load_in_4bit=True)
)

In [None]:
# <testing: >
messages = [
    {
        "role": "user",
        "content": "What is the capital of Germany? Explain why thats the case and if it was different in the past?",
    }
]
print(llm.predict(messages, max_new_tokens=200, do_sample=False))