In [None]:
import yaml
import json
import pandas as pd
from peft import LoraConfig, get_peft_model
from transformers import Trainer, TrainingArguments
from datasets import Dataset
from components.instructor import Instructor

In [None]:
# Load config file into dictionary
with open('./config/config.yaml', 'r') as file:
    config = yaml.safe_load(file)
config

In [None]:
instructor = Instructor(config)
instructor.check_device_map(no_split_module_classes=config['no_split_module_classes'])
instructor.load_model()
instructor.model
instructor.load_dataset(test_size=0.999)
instructor.train_dataset[0:5]

In [None]:
instructor.tokenizer

In [None]:
def tokenization(example):
    return instructor.tokenizer(example["text"])

dataset = instructor.train_dataset.map(tokenization, batched=True, load_from_cache_file=False)
dataset

In [None]:
def tokenize_function(examples):
    return instructor.tokenizer(examples['examples'], padding="max_length", truncation=True)

train_dataset = instructor.train_dataset.map(tokenize_function, batched=True)


# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

lora_model = get_peft_model(instructor.model, peft_config)
lora_model.print_trainable_parameters()

# Training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    max_steps=200,
    save_strategy="no",
    logging_steps=1,
    output_dir=config['instruction_output_dir'],
    optim="paged_adamw_32bit",
    warmup_steps=100,
    bf16=True,
    remove_unused_columns=False
)

# Create DPO trainer
trainer = Trainer(
    lora_model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=instructor.tokenizer
)