In [1]:
!nvidia-smi

Tue Aug 27 16:17:25 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A6000                On | 00000000:AF:00.0 Off |                  Off |
| 33%   60C    P5               11W / 300W|   3402MiB / 49140MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000                On | 00000000:D8:00.0 Off |  

In [2]:
import os

import torch
import pandas as pd
from llama_index.llms.ollama import Ollama
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig
from trl import SFTTrainer
from datasets import Dataset

from prompts import GRADE_QA_PROMPT,QUESTION_FORMAT


os.environ["HF_TOKEN"] = "hf_..."

def remove_duplicate_answer(answer: str) -> str:
    selections = answer.split(", ")
    selections = sorted(list(set(selections)))
    return ", ".join(selections)

In [3]:
dataset = pd.read_csv("../data/qa_dataset/dataiku_multiple_choice_qa.csv")

questions = [QUESTION_FORMAT.format(**row.to_dict()) for _, row in dataset.iterrows()]
prompts = [GRADE_QA_PROMPT.format(query_str=question) for question in questions]

assert dataset.shape[0] == len(prompts)

In [4]:
dataset["text"] = "<s>[INST]" + pd.Series(prompts) + "[/INST]" + dataset["answer"] + "</s>"

In [5]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    quantization_config=quantization_config,
    torch_dtype=torch.float16,
    device_map='auto'
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [7]:
peft_params = LoraConfig(
    lora_alpha=128,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [8]:
training_params = TrainingArguments(
    output_dir="../output",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=10,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant"
)

In [9]:
train_dataset = Dataset.from_pandas(dataset[["text"]])

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_params,
    max_seq_length=256,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/1978 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [10]:
import gc
gc.collect()

torch.cuda.empty_cache()

151

In [11]:
trainer.train()

Step,Training Loss
1,2.8409
2,2.4583
3,2.1738
4,1.8472
5,1.5227
6,1.1847
7,0.8262
8,0.5131
9,0.2615
10,0.1121
