In [None]:
!pip install -q datasets transformers==4.31.0 trl==0.4.7 accelerate==0.21.0 peft==0.4.0 scipy wandb bitsandbytes==0.40.2 sentencepiece

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

import wandb
from getpass import getpass
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from datasets import load_dataset

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
model_name = 'TinyPixel/Llama-2-7B-bf16-sharded'
dataset_name = 'odunola/complete_foodie_dataset'


In [None]:
dataset = load_dataset(dataset_name)['train'].train_test_split(test_size = 0.2)

In [None]:
train_dataset, eval_dataset = dataset['train'], dataset['test']

In [None]:
lora_r = 64
lora_alpha = 16
lora_dropout = 0.1

lora_config = LoraConfig(
    lora_alpha = lora_alpha,
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = lora_dropout,
    r = lora_r,
    bias = 'none',
    task_type = 'CAUSAL_LM'
)

In [None]:
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False

bnb_config = BitsAndBytesConfig(
    load_in_4bit = use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=bnb_4bit_compute_dtype,
    bnb_4bit_use_double_quant = use_nested_quant,
)

In [None]:
import torch
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto'
)

for param in base_model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

base_model.gradient_checkpointing_enable()  # reduce number of stored activations
base_model.enable_input_require_grads()

In [None]:
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast = False)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
from peft import get_peft_model
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

In [None]:
tokenized_train_dataset = train_dataset.map(lambda examples: tokenizer(examples['text']), batched = True)
tokenized_eval_dataset = eval_dataset.map(lambda examples: tokenizer(examples['text']), batched = True)

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
trainer = Trainer(
  model = model,
  train_dataset = tokenized_train_dataset,
  eval_dataset = tokenized_eval_dataset,
  args =TrainingArguments(
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps = 100,
    max_steps = 3000,
    learning_rate = 2e-4,
    fp16 = True,
    logging_steps = 50,
    output_dir = 'outputs',
    eval_steps = 100,
    save_steps = 100
    optim = "paged_adamw_32bit"
  ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm = False)
)

In [None]:
trainer.train()