#### Get dataset for finetuning

In [15]:
from datasets import load_dataset
alpaca_gpt4_ds = load_dataset("c-s-ale/alpaca-gpt4-data", split='train')

In [16]:
# Filter dataset based on overall sample length
dataset = alpaca_gpt4_ds.filter(lambda sample: (len(sample['input']) + len(sample['output']) + len(sample['instruction'])) <= 2000)

In [17]:
dataset = dataset.select(range(1000))

#### Setup prompt formatting function

In [18]:
def sample_formatting_function(sample):
    base_prompt_w_input = f"Below is an instruction that describes a task ,paired with an input that provides further context. Write a response that appropriately completes that request."
    base_prompt_wo_input = f"Below is an instruction that describes a task. Write a response that appropriately completes that request."

    input_template = f"### Input:\n{sample['input']}"
    response_template = f"### Response:\n{sample['output']}"

    final_prompt_list = []

    if sample['input']:
        final_prompt_list.append(base_prompt_w_input)
        final_prompt_list.append(input_template)
    else:
        final_prompt_list.append(base_prompt_wo_input)
    
    final_prompt_list.append(response_template)

    return "\n\n".join(final_prompt_list)



#### Setup training loop

In [19]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id="meta-llama/Meta-Llama-3-8B"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_use_double_quant = True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    use_cache=False,
    device_map="cuda:0"
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [20]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


#### Setup LoRA configuration

In [21]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=32,
    bias="none",
    task_type="CAUSAL_LM"
)

model = prepare_model_for_kbit_training(model)

#### Setup Training Arguments and model

In [22]:
import os
from transformers import TrainingArguments
os.environ['NCCL_P2P_DISABLE']='1'
os.environ['NCCL_IB_DISABLE']='1'

args = TrainingArguments(
    output_dir = "llama3_8b_nf4_alpaca_gpt4",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    logging_steps=1,
    save_strategy="steps",
    save_steps=100,
    learning_rate=2e-4,
    bf16=True,
    tf32=True,
    fp16=False,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=True
)

model = get_peft_model(model, peft_config)

In [23]:
from trl import SFTTrainer

max_seq_len = 2048

trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    peft_config = peft_config,
    max_seq_length = max_seq_len,
    tokenizer = tokenizer,
    packing = True,
    formatting_func = sample_formatting_function,
    args=args
)

Generating train split: 0 examples [00:00, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [None]:
trainer.train()

In [None]:
trainer.save_model()