In [12]:
!pip install bitsandbytes



In [13]:
import bitsandbytes as bnb
print(bnb.__version__)

0.48.2


In [14]:
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer , BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

In [15]:
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'

bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype= torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config= bnb_config,
    device_map= 'auto',
    trust_remote_code = True
)

tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)


In [16]:
lora_config = LoraConfig(
    r = 8,    # rank
    lora_alpha = 16,
    target_modules = ['q_proj','v_proj'],
    lora_dropout = 0.05,
    bias = 'none',
    task_type = TaskType.CAUSAL_LM
)

model = get_peft_model(model,lora_config)

In [17]:
data = load_dataset('openai/gsm8k', 'main',split='train[:200]')

In [18]:
def tokenize(batch):
    texts = [
        f'### Instruction:\n{instruction} \n### Response:\n{out}'
        for instruction,out in zip(batch['question'], batch['answer'])
    ]

    tokens = tokenizer(
        texts,
        padding = 'max_length',
        max_length = 256,
        truncation = True,
        return_tensors ='pt'
    )

    tokens['labels'] = tokens['input_ids'].clone()

    return tokens

In [19]:
tokenized_data = data.map(tokenize,batched=True,remove_columns=data.column_names)

In [20]:
training_args = TrainingArguments(
    output_dir = './tinyllama-lora',
    per_device_train_batch_size = 4 ,
    gradient_accumulation_steps = 4,
    learning_rate = 1e-3,
    num_train_epochs = 50,
    fp16 = True,
    logging_steps = 20,
    save_strategy = 'epoch',
    remove_unused_columns = False,
    label_names = ['labels']
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [21]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_data,
    processing_class = tokenizer
)

In [22]:
import os
os.environ["WANDB_MODE"] = "offline"

trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.


Step,Training Loss
20,1.9693
40,0.8175
60,0.7163
80,0.6303
100,0.5375
120,0.4693
140,0.3945
160,0.3109
180,0.2594
200,0.1972


TrainOutput(global_step=650, training_loss=0.23139604967374067, metrics={'train_runtime': 1132.2468, 'train_samples_per_second': 8.832, 'train_steps_per_second': 0.574, 'total_flos': 1.590741172224e+16, 'train_loss': 0.23139604967374067, 'epoch': 50.0})