In [None]:
import torch
import numpy as np
import pandas as pd
import torch.nn.functional as F
import json

from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.optim import AdamW
from lightning import Fabric

from huggingface_hub import login
from datasets import load_dataset
from transformers import get_scheduler, pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, Trainer, EarlyStoppingCallback
from peft import get_peft_model, LoraConfig, PeftModelForCausalLM, prepare_model_for_kbit_training, PeftConfig, PeftModel
from trl import SFTTrainer, SFTConfig, PPOTrainer, PPOConfig

from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

%config InlineBackend.figure_formats = ['svg']

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
torch.set_float32_matmul_precision("medium")
fabric = Fabric(accelerator="cuda", devices=1, precision="bf16-mixed")
device = fabric.device
fabric.launch()

Using bfloat16 Automatic Mixed Precision (AMP)


In [4]:
with open('../datasets/train_test_data.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

In [5]:
def preprocess_dnd(example):
    return {
        "input": example["input"] if example["input"] else "<startoftext>",
        "output": example["output"]
    }

In [6]:
dnd_data = [preprocess_dnd(example) for index, example in data.items()]

In [7]:
tc = load_dataset("Salesforce/xlam-function-calling-60k")

In [8]:
def preprocess_tool_calling(example):
    return {
        "input": example["query"],
        "output": f"Function: {example['tools']} Arguments: {example['answers']}"
    }

tool_calling_data = [preprocess_tool_calling(example) for example in tc['train']]

In [9]:
model_name = 'meta-llama/Llama-3.1-8B-Instruct'

In [10]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, quantization_config=quant_config,)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.gradient_checkpointing_enable()

tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [12]:
train_data, temp_data = train_test_split(tool_calling_data + dnd_data, test_size=0.1, random_state=42)
test_data, val_data = train_test_split(list(temp_data), test_size=0.5, random_state=42)

print("Train Data Size:", len(train_data))
print("Test Data Size:", len(test_data))
print("Val Data Size:", len(val_data))

Train Data Size: 154120
Test Data Size: 8562
Val Data Size: 8563


In [13]:
def formatting_prompts_func(examples):
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []

    for input_text, output_text in zip(inputs, outputs):
        input_text = input_text or ""
        output_text = output_text or ""
        texts.append(input_text + output_text + tokenizer.eos_token)

    return { "text": texts }


In [14]:
from datasets import Dataset

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)
test_dataset = Dataset.from_list(test_data)

In [15]:
train_dataset = train_dataset.map(formatting_prompts_func, batched=True, remove_columns=train_dataset.column_names)
val_dataset = val_dataset.map(formatting_prompts_func, batched=True, remove_columns=val_dataset.column_names)
test_dataset = test_dataset.map(formatting_prompts_func, batched=True, remove_columns=test_dataset.column_names)


Map:   0%|          | 0/154120 [00:00<?, ? examples/s]

Map:   0%|          | 0/8563 [00:00<?, ? examples/s]

Map:   0%|          | 0/8562 [00:00<?, ? examples/s]

In [16]:
train_dataset

Dataset({
    features: ['text'],
    num_rows: 154120
})

In [None]:
training_args = SFTConfig(
    output_dir="./weights",
    num_train_epochs=1,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=100,
    gradient_accumulation_steps=8,
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    optim="paged_adamw_8bit",
    fp16=True,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=training_args,
)

trainer.train()

Converting train dataset to ChatML:   0%|          | 0/154120 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/154120 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/154120 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/154120 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33malog[0m ([33malog-chulalongkorn-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.76 GiB. GPU 0 has a total capacity of 15.58 GiB of which 1.29 GiB is free. Including non-PyTorch memory, this process has 11.61 GiB memory in use. Of the allocated memory 10.99 GiB is allocated by PyTorch, and 347.53 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
trainer.save_model("./weights/SFT")
tokenizer.save_pretrained("./weights/SFT")

In [None]:
peft_config = PeftConfig.from_pretrained("./weights/SFT_yuaylong")
base_model = AutoModelForCausalLM.from_pretrained(
    peft_config.base_model_name_or_path,
    device_map="auto",
    quantization_config=quant_config,
)
model = PeftModel.from_pretrained(base_model, "./weights/SFT_yuaylong")

tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
ppo_config = PPOConfig(
    batch_size=1,
    mini_batch_size=1,
    gradient_accumulation_steps=1,
    learning_rate=1e-5,
)

ppo_trainer = PPOTrainer(
    config=ppo_config,
    model=model,
    tokenizer=tokenizer,
)

In [None]:
def dm_reward_fn(query, response):
    input_text = f'the input is: {query}\nthe answer is: {response}'
    score = reward_model.predict(query + response)


# Training loop
for prompt in prompts:
    response = ppo_trainer.generate(prompt, max_new_tokens=100)[0]
    reward = dm_reward_fn(prompt, response)
    ppo_trainer.step([prompt], [response], [reward])