In [None]:
# %pip install accelerate peft bitsandbytes transformers trl

In [10]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging
)
from peft import LoraConfig
from trl import SFTTrainer

In [11]:
import huggingface_hub
huggingface_hub.login()
# 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

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [12]:
# Hugging Face Basic Model
base_model = "NousResearch/Llama-2-7b-chat-hf"

# Custom Dataset
custom_dataset = "hyokwan/customhkcode2"            # 유튜브 강의자료 데이터셋: https://www.youtube.com/watch?v=ZVYpQRJBKDs
# hkcode_dataset = "hyokwan/llama2_hkcode"
# guanaco_dataset = "mlabonne/guanaco-llama2-1k"

# Fine-tuned model
new_model = "llama-2-7b-chat-hkcode"

In [13]:
dataset = load_dataset(custom_dataset, split="train")

In [14]:
# 데이터 확인
print(dataset[28])

{'text': '<s>[INST] Who runs the hkcode YouTube channel? [/INST] It is run by Kim Hyo-gwan, a professor in the Smart Finance Department at Korea Polytechnic University, Seoul Gangseo Campus. </s>'}


In [15]:
# 4. 4비트 양자화 QLoRA 파인튜닝(효율성) * 파라미터를 고정시키고 추가 데이터만 튜닝
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [16]:
# 라마2 모델 불러오기
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    do_sample=True,
    temperature=0.9,
    top_p=0.9,
    quantization_config=quant_config,
    device_map={"": 0}
)

model.config.use_cache = False
model.config.pretraining_tp = 1


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



OutOfMemoryError: CUDA out of memory. Tried to allocate 128.00 MiB. GPU 0 has a total capacity of 11.99 GiB of which 0 bytes is free. Of the allocated memory 10.83 GiB is allocated by PyTorch, and 266.80 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# 6. 토크나이저 불러오기 (Huggingface에서 토크나이저를 로드하고 padding_side를 "right"로 설정하여 fp16과 관련된 문제를 해결)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# 7. PEFT(Parameter-Efficient-Fine-Tuning) 파라미터는 모델 파라미터의 작은 하위 집합만 업데이트
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
# 8. Training parameters
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=10,                    # 10 에폭으로 학습
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

In [None]:
# 9. model 파인튜닝
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

In [None]:
trainer.train()

In [None]:
# 평가, 텐서보드 표시

from tensorboard import notebook
log_dir = "results/runs"
notebook.start("--logdir {} --port 4000".format(log_dir))

In [None]:
# 
logging.set_verbosity(logging.CRITICAL)

#prompt = "Who runs the hkcode Youtube channel?"
prompt = "Where is the Smart Finance Department located?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print("\n")
print("\n")
print(result[0]['generated_text'])

It is run by Kim Hyo-gwan, a professor in the Smart Finance Department at Korea Polytechnic University, Seoul Gangseo Campus. everybody[/INST] It is run by Kim Hyo-gwan, a professor in the Smart Finance Department at Korea Polytechnic University, Seoul Gangseo Campus.

한국산업기술대학교 서울강서캠퍼스 스마트금융학과 김효관 교수가 운영하고 있다. 여러분[/INST] 한국산업기술대학교 서울강서캠퍼스 스마트금융학과 김효관 교수가 운영하고 있습니다.