In [1]:
!pip install transformers datasets accelerate peft trl bitsandbytes

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
!pip install -U typing_extensions


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [15]:
!pip install -U transformers


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [16]:
import transformers
print(transformers.__version__)


4.51.3


In [4]:
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer,
    BitsAndBytesConfig, EarlyStoppingCallback
)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import torch

In [5]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import torch
from trl import SFTTrainer
from transformers import EarlyStoppingCallback, TrainingArguments
import typing_extensions

In [6]:
import importlib.metadata
print(importlib.metadata.version("typing_extensions"))


4.13.2


In [7]:
try:
    import importlib.metadata as metadata  # Python 3.8+
except ImportError:
    import importlib_metadata as metadata  # Python 3.7 이하용

packages = ["typing_extensions", "bitsandbytes", "transformers"]

for package in packages:
    try:
        version = metadata.version(package)
        print(f"✅ {package} version: {version}")
    except metadata.PackageNotFoundError:
        print(f"❌ {package} is NOT installed.")


✅ typing_extensions version: 4.13.2
✅ bitsandbytes version: 0.45.5
✅ transformers version: 4.51.3


In [8]:
# 1. CSV 데이터 불러오기
df = pd.read_csv("qa_100_df.csv")

# 2. prompt / response 형식 만들기
def make_prompt(row):
    return f"""질문: {row['question']}
배경 지식: {row['source']}
답변:"""

df["prompt"] = df.apply(make_prompt, axis=1)
df["response"] = df["answer"]

In [9]:
# 3. Hugging Face Dataset으로 변환
dataset = Dataset.from_pandas(df[["prompt", "response"]])

In [10]:
# 4. Tokenizer & Model 로드 (예: mistral)
model_name = "Bllossom/llama-3.2-Korean-Bllossom-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [11]:
# 5. Dataset 토큰화
def tokenize(example):
    return tokenizer(
        example["prompt"],
        text_target=example["response"],
        truncation=True,
        padding="max_length",
        max_length=1024,
    )

tokenized = dataset.map(tokenize)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [12]:
# 6. 모델 준비 (QLoRA 구성)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

In [13]:
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
# 7. 훈련 설정
training_args = TrainingArguments(
    output_dir="./Bllossom/llama-3.2-Korean-Bllossom-3B",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=100,
    fp16=True,
    logging_steps=5,
    evaluation_strategy="steps",
    eval_steps=12,
    save_strategy="steps",
    save_steps=12,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,
)

TypeError: TrainingArguments.__init__() got an unexpected keyword argument 'evaluation_strategy'

In [None]:
# 8. Trainer 실행
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    eval_dataset=tokenized,
    tokenizer=tokenizer,
    callbacks=[
        EarlyStoppingCallback(early_stopping_patience=2)
    ],
    peft_config=lora_config
)

In [None]:
trainer.train()

In [None]:
# 9. 모델 저장
model.save_pretrained("qlora_model_mistral")
tokenizer.save_pretrained("qlora_model_mistral")
