In [1]:
!pip install transformers datasets accelerate peft trl bitsandbytes

Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-1.7.0-py3-none-any.whl.metadata (19 kB)
Collecting peft
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting trl
  Downloading trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Downloading huggingface_hub-0.31.4-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading to

In [5]:
!pip install -U typing_extensions

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [6]:
!pip install -U transformers

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [7]:
import transformers
print(transformers.__version__)

4.51.3


In [11]:

import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer,
    BitsAndBytesConfig, EarlyStoppingCallback
)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import torch


In [12]:

import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import torch
from trl import SFTTrainer
from transformers import EarlyStoppingCallback, TrainingArguments
import typing_extensions

In [10]:
import importlib.metadata
print(importlib.metadata.version("typing_extensions"))

4.13.2


In [13]:

try:
    import importlib.metadata as metadata  # Python 3.8+
except ImportError:
    import importlib_metadata as metadata  # Python 3.7 이하용

packages = ["typing_extensions", "bitsandbytes", "transformers"]

for package in packages:
    try:
        version = metadata.version(package)
        print(f"✅ {package} version: {version}")
    except metadata.PackageNotFoundError:
        print(f"❌ {package} is NOT installed.")

✅ typing_extensions version: 4.13.2
✅ bitsandbytes version: 0.45.5
✅ transformers version: 4.51.3


In [14]:

# 1. CSV 데이터 불러오기
df = pd.read_csv("qa_100_df.csv")

# 2. prompt / response 형식 만들기
def make_prompt(row):
    return f"""질문: {row['question']}
배경 지식: {row['source']}
답변:"""

df["prompt"] = df.apply(make_prompt, axis=1)
df["response"] = df["answer"]

In [15]:
# 3. Hugging Face Dataset으로 변환
dataset = Dataset.from_pandas(df[["prompt", "response"]])

In [None]:
# 4. Tokenizer & Model 로드 
model_name = "beomi/KoAlpaca-Polyglot-5.8B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token='')

print("Special tokens:", tokenizer.special_tokens_map)
print("Pad token:", tokenizer.pad_token)
print("EOS token:", tokenizer.eos_token)
print("UNK token:", tokenizer.unk_token)

tokenizer_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

Special tokens: {'eos_token': '<|endoftext|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|endoftext|>', '<|sep|>', '<|acc|>', '<|tel|>', '<|rrn|>']}
Pad token: <|endoftext|>
EOS token: <|endoftext|>
UNK token: None


In [17]:
#  pad_token이 <unk>(unknown token)으로 설정됨
# 일반적으로 패딩 토큰은 모델의 학습에 영향을 주지 않는 토큰이어야 하므로 
# <unk>보다는 <pad> 또는 </s> 같은 종료 토큰을 사용하는 것이 더 적절
tokenizer.pad_token = tokenizer.eos_token

In [18]:
# 5. 데이터셋에 tokenization 적용 (prompt와 response 모두)
def tokenize_function(examples):
    # 토큰화: prompt는 모델의 입력, response는 모델의 출력
    model_inputs = tokenizer(examples["prompt"], padding="max_length", truncation=True, max_length=512)
    
    # response를 labels로 설정 (모델이 예측해야 할 텍스트)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["response"], padding="max_length", truncation=True, max_length=512)
    
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# 데이터셋에 토크나이즈 적용
tokenized = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



In [19]:
tokenized = tokenized.rename_columns({"response": "labels"})

In [20]:
# 6. 모델 준비 (QLoRA 구성)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name, token='',quantization_config=bnb_config, device_map="auto")
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value", "dense"],
    #target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

#model = AutoModelForCausalLM.from_pretrained(
#    "Solar-Ko-Recovery-11B",
#    quantization_config=bnb_config,
#    device_map="auto"
#)
#model = prepare_model_for_kbit_training(model)

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/36.8k [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

model-00003-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00004-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00006-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00008-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00005-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00007-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00002-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00001-of-00013.safetensors:   0%|          | 0.00/926M [00:00<?, ?B/s]

model-00009-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00010-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00011-of-00013.safetensors:   0%|          | 0.00/952M [00:00<?, ?B/s]

model-00012-of-00013.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00013-of-00013.safetensors:   0%|          | 0.00/515M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/13 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [25]:
# 7. 훈련 설정
training_args = TrainingArguments(
    output_dir="./qlora_model_koalpaca",  # 모델 저장 위치
    per_device_train_batch_size=2,  # 배치 크기
    gradient_accumulation_steps=4,  # 그래디언트 누적
    learning_rate=2e-4,  # 학습률
    num_train_epochs=90,  # 에폭 수
    fp16=True,  # FP16 사용 여부
    logging_dir="./logs",  # 로깅 저장 디렉토리
    logging_steps=5,  # 로깅 주기
    save_steps=12,  # 모델 저장 주기
    save_total_limit=1,  # 저장할 모델 최대 개수
    load_best_model_at_end=True,  # 훈련 종료 시 최고 모델 로드
    metric_for_best_model="loss",  # 최고 모델을 선택할 지표
    greater_is_better=False,  # 더 낮은 값이 더 좋은 경우
    eval_strategy="steps",  # 평가 전략 (steps 또는 epoch)
    save_strategy="steps",  # 저장 전략 (steps 또는 epoch)
    eval_steps=12,  # 평가 주기 (save_steps와 동일하게 설정)
)

In [26]:
# 8. Trainer 실행
trainer = Trainer(
    model=model,  # 훈련할 모델
    args=training_args,  # 훈련 파라미터
    train_dataset=tokenized,  # 훈련 데이터셋
    eval_dataset=tokenized,  # 평가 데이터셋
    tokenizer=tokenizer,  # 모델의 토크나이저
    callbacks=[  # EarlyStopping을 추가
        EarlyStoppingCallback(early_stopping_patience=2)
    ]
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [27]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
12,13.7848,8.503575
24,6.7001,5.540758
36,5.0882,4.978773
48,4.7696,4.804769
60,4.5664,4.656837
72,4.5374,4.486512
84,4.6161,4.369891
96,4.4781,4.303388
108,4.3184,4.254752
120,4.3638,4.195055




TrainOutput(global_step=1080, training_loss=2.524917380898087, metrics={'train_runtime': 4963.6752, 'train_samples_per_second': 1.813, 'train_steps_per_second': 0.218, 'total_flos': 1.471954855038812e+17, 'train_loss': 2.524917380898087, 'epoch': 83.08})

In [28]:
# 9. 모델 저장
model.save_pretrained("qlora_model_koalpaca")
tokenizer.save_pretrained("qlora_model_koalpaca")

('qlora_model_koalpaca/tokenizer_config.json',
 'qlora_model_koalpaca/special_tokens_map.json',
 'qlora_model_koalpaca/tokenizer.json')