In [1]:
!pip install -q datasets transformers==4.31.0 trl==0.4.7 accelerate==0.21.0 peft==0.4.0 scipy wandb bitsandbytes==0.40.2 sentencepiece

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

import wandb
from getpass import getpass
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from datasets import load_dataset

In [3]:
model_name = 'openlm-research/open_llama_3b_v2'
dataset_name = 'tatsu-lab/alpaca'


In [4]:
dataset = load_dataset(dataset_name)['train'].train_test_split(test_size = 0.2)

Downloading readme:   0%|          | 0.00/7.47k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/24.2M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [5]:
train_dataset, eval_dataset = dataset['train'], dataset['test']

In [6]:
lora_r = 64
lora_alpha = 16
lora_dropout = 0.1

lora_config = LoraConfig(
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    r = lora_r,
    bias = 'none',
    task_type = 'CAUSAL_LM'
)

In [7]:
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False

bnb_config = BitsAndBytesConfig(
    load_in_4bit = use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=bnb_4bit_compute_dtype,
    bnb_4bit_use_double_quant = use_nested_quant,
)

In [8]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto'
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/6.85G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [9]:
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast = False)
tokenizer.pad_token = tokenizer.eos_token

Downloading (…)okenizer_config.json:   0%|          | 0.00/593 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/512k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/330 [00:00<?, ?B/s]

You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


In [19]:
output_dir = "./results"
num_train_epochs = -1
fp16 = False
bf16 = False
per_device_train_batch_size = 16
per_device_eval_batch_size = 16
gradient_accumulation_steps = 1
gradient_checkpointing = True
max_grad_norm = 0.3
learning_rate = 3e-4
weight_decay = 0.001
optim = "paged_adamw_32bit"
lr_scheduler_type = "constant"
max_steps = 5000
warmup_ratio = 0.03
group_by_length = True
save_steps = 100
logging_steps = 25
max_seq_length = None
packing = False

In [20]:
arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    per_device_eval_batch_size= per_device_eval_batch_size,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="all",
    evaluation_strategy="steps",
    eval_steps=200,  # Evaluate every 20 steps
    save_strategy = 'steps',
    save_safetensors = True
)

In [21]:
trainer = SFTTrainer(
    model = base_model,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    peft_config = lora_config,
    dataset_text_field = 'text',
    tokenizer = tokenizer,
    args = arguments
)



In [22]:
trainer.train()

Step,Training Loss,Validation Loss
200,0.8042,1.040389
400,0.7946,1.017954
600,0.7862,1.00728
800,0.766,1.00556
1000,0.761,0.99865
1200,0.7663,0.992431
1400,0.7646,0.989242
1600,0.7575,0.984832
1800,0.7535,0.984409


KeyboardInterrupt: 

In [None]:
!rm -r ./food_b