In [1]:
!pip install peft
!pip install datasets
!pip install bitsandbytes
!pip install accelerate
!pip install trl

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/251.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m245.8/251.6 kB[0m [31m7.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-0.32.1-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cu

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# file_path = '/content/drive/MyDrive/DACON/INHA-DACON.jsonl'
file_path = "./data/INHA-DACON.jsonl"

In [4]:
import os
import json
from transformers import AutoModelForCausalLM,AutoTokenizer
from transformers import TrainingArguments
from transformers import Trainer
from peft import get_peft_model,LoraConfig,TaskType
from datasets import load_dataset,Dataset,concatenate_datasets
import torch
from torch.nn.utils.rnn import pad_sequence


In [None]:
# 필요한 라이브러리 불러오기
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from trl import SFTConfig, SFTTrainer
import torch

# torch_dtype 설정
torch_dtype = torch.float16  # 예시로 torch.float16 사용. 필요에 따라 변경 가능

In [None]:
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

In [18]:
# Model load
model_id = "MLP-KTLim/llama-3-Korean-Bllossom-8B"
# 양자화 제거 부탁
base_model = AutoModelForCausalLM.from_pretrained(model_id,
                                                  torch_dtype=torch.float16,
                                                  quantization_config=quant_config,
                                                  device_map='auto')

base_model.config.use_cache = False
base_model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side='right'

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [6]:
# Add Lora Adapter
# ref. https://arxiv.org/abs/2106.09685

lora_config = LoraConfig(
     task_type=TaskType.CAUSAL_LM,
     r=4,
     target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj'],
     lora_alpha = 8,
     lora_dropout=0.05,
     #modules_to_save=['embed_tokens','lm_head']
)

model = get_peft_model(base_model,lora_config)

In [7]:
import json
from datasets import Dataset

# 데이터 로드
data_path = file_path 
data = []
with open(data_path, 'r', encoding='utf-8') as f:
    for line in f:
        data.append(json.loads(line))

# 데이터 형식 변환
formatted_data = [
    {
        'prompt': f"주어진 Context: {entry['context']} 를 토대로 Question: {entry['instruction']}에 대해 간결하게 답장해줘",
        'completion': entry['response']
    }
    for entry in data
]

# 데이터를 Hugging Face Dataset으로 변환
dataset = Dataset.from_list(formatted_data)

In [8]:
train_dataset = dataset
train_dataset

Dataset({
    features: ['prompt', 'completion'],
    num_rows: 33716
})

In [9]:
# System Message 설정
PROMPT = \
    '''너는 주어진 Context를 토대로 Question에 답하는 챗봇이야. Question에 대한 답변만 가급적 한 단어로 최대한 간결하게 답변하도록 해.'''

In [10]:
# Dataset Preprocessing
def formatting_func(examples):
    input_ids=[]
    labels = []

    for ins,ou in zip(examples['prompt'],examples['completion']):
        instruction = ins
        response = ou
        #context =inp

        messages = [{'role':'user', 'content':f"{PROMPT}"},
                    {'role':'user', 'content':f"{instruction}"}]

        instruction_chat= tokenizer.apply_chat_template(messages,tokenize=True,add_generation_prompt=True)
        response_chat = tokenizer(response,return_attention_mask=False,add_special_tokens=False)['input_ids']

        chat_messages = instruction_chat+response_chat+[tokenizer.convert_tokens_to_ids('<|eot_id|>')]

        label = ([-100]*len(instruction_chat))+response_chat+[tokenizer.convert_tokens_to_ids('<|eot_id|>')]

        input_ids.append(chat_messages)
        labels.append(label)

    return {'input_ids':input_ids,'labels':labels}

In [11]:
train_dataset=train_dataset.shuffle()
# train_dataset=train_dataset.select(range(50))
train_dataset = train_dataset.map(
    formatting_func,
    num_proc=4,
    batched=True,
    remove_columns = train_dataset.column_names,
)

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/33716 [00:00<?, ? examples/s]

In [12]:
split_dataset = train_dataset

In [13]:
class CustomDataCollator(object):
    def __init__(self,tokenizer,prompt,padding_value,batch_first):
        self.tokenizer = tokenizer
        self.prompt = prompt
        self.padding_value=padding_value
        self.batch_first=batch_first

    def __call__(self, examples):
        # [{},{},{}]
        input_ids=[]
        labels = []

        for i in range(len(examples)):
            input_ids.append(torch.tensor(examples[i]['input_ids'],dtype=torch.long))
            labels.append(torch.tensor(examples[i]['labels'],dtype=torch.long))

        padded_input_ids = pad_sequence(input_ids,padding_value=self.padding_value,batch_first=self.batch_first)
        padded_labels = pad_sequence(labels,padding_value=self.padding_value,batch_first=self.batch_first)
        attention_mask = padded_input_ids.ne(self.padding_value)

        return {'input_ids': padded_input_ids, 'labels': padded_labels,'attention_mask':attention_mask}


In [14]:
data_collator = CustomDataCollator(tokenizer,PROMPT,tokenizer.pad_token_id,batch_first=True)

In [15]:
# output_dir 변경 부탁
training_args = TrainingArguments(
    output_dir = './results',
    num_train_epochs = 10,
    fp16=True,
    per_device_train_batch_size=1,
    #per_device_eval_batch_size=1,
    gradient_accumulation_steps=1,
    save_strategy='epoch',
    #evaluation_strategy='epoch',
    save_total_limit=1,
    optim='adamw_bnb_8bit',
    #load_best_model_at_end=True,
    save_only_model=True,
    logging_strategy='steps',
    logging_steps=30,
    label_names=['labels'],
)

In [19]:
trainer = Trainer(
    model=model,
    train_dataset=split_dataset,
    # eval_dataset=split_dataset['test'],
    args=training_args,
    data_collator=data_collator,
)

trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 224.00 MiB. GPU 

In [None]:
# 모델 저장
model.save_pretrained('./result/fine_tuned_model')
tokenizer.save_pretrained('./result/fine_tuned_model')