Train base LLAMA-2-7B for chat using Guanaco dataset (OpenAssistant subset)

In [None]:
!pip install -q datasets==2.16.0 bitsandbytes einops peft trl

In [None]:
import torch
import wandb

from datasets import load_dataset
from huggingface_hub import login
from peft import LoraConfig, PeftModel, get_peft_model
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("hugging-face-token")
wandb_key = user_secrets.get_secret("wandb-key")

In [None]:
from kaggle_secrets import UserSecretsClient

!wandb login $wandb_key

In [None]:
dataset_name = "timdettmers/openassistant-guanaco"
dataset = load_dataset(dataset_name, split="train")

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
model_id = "ybelkada/falcon-7b-sharded-bf16" # 16-bit sharded falcon
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config, 
    device_map="auto",
    trust_remote_code=True)

qlora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=[
        "query_key_value",
        #"dense",
        #"dense_h_to_4h",
        #"dense_4h_to_h",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, qlora_config)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

print_trainable_parameters(model)

In [None]:
output_dir = "/kaggle/working/model/"
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 10
learning_rate = 2e-4

training_arguments = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    #push_to_hub=True,
    optim=optim,
    save_strategy="epoch",
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
)

In [None]:
max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=qlora_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

In [None]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [None]:
trainer.train()

In [None]:
# push to hub
trainer.save_model("falcon-7b-guanaco-16bit")

fine_tuned_model = PeftModel.from_pretrained("falcon-7b-guanaco-16bit")

adapters_path = 'falcon-guanaco'

fine_tuned_model.push_to_hub("jpscardoso/falcon-7b-guanaco-16bit", token="")