# Fine-tuning OLMo-7B

### Fine-tuning OLMo-7B with LoRA using the PEFT library

In [None]:
# !pip install -qU transformers accelerate bitsandbytes peft trl datasets evaluate ai2-olmo

In [None]:
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig

#### Add Bits and Bytes config

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

#### Setup Model and Tokenizer

In [None]:
model_id = "allenai/OLMo-7B"

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map='cuda:0',
    torch_dtype=torch.float16,
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#### Get Dataset

In [None]:
from datasets import load_dataset

dataset_name = "WizardLM/WizardLM_evol_instruct_70k"
dataset = load_dataset(dataset_name)

#### Create prompt template

In [None]:
def create_prompt(sample):
    eos_token="<|endoftext|>"

    full_prompt = ""
    full_prompt += "### Instruction:"
    full_prompt += "\n" + sample['instruction']
    full_prompt += "\n\n### Response:"
    full_prompt += "\n" + sample["output"]
    full_prompt += eos_token

    return full_prompt


#### Setup LoRA configs and model

In [None]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
model.config.use_cache = False
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=['att_proj'],
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)

#### Setup Training Arguments and SFTTrainer

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="OLMo_sft_experiment",
    max_steps = 500,
    per_device_train_batch_size = 1,
    warmup_steps=0.03,
    logging_steps = 10,
    save_strategy="epoch",
    evaluation_strategy="steps",#match strategy of max steps
    eval_steps=20,
    learning_rate=2e-4,
    bf16=True,
    lr_scheduler_type='constant'
)

In [None]:
from trl import SFTTrainer

max_seq_len = 2048

trainer = SFTTrainer(
    model=model,
    peft_config=peft_config,
    max_seq_length=max_seq_len,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=create_prompt,
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
)

#### Time to Train!

####

In [None]:
trainer.train()