In [None]:
!pip install transformers datasets accelerate peft trl bitsandbytes

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting peft
  Using cached peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting accelerate
  Using cached accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Using cached bitsandbytes-0.45.5-py3-none-win_amd64.whl.metadata (5.1 kB)
Collecting trl
  Using cached trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Using cached pyarrow-20.0.0-cp312-cp312-win_amd64.whl.metadata (3.4 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Using cached xxhash-3.5.0-cp312-cp312-win_amd64.whl.metadata (13 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Using cached multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.30.0->transformers)
  Using cached fsspec-2025.3.0-py3-none-any.whl.me

In [None]:
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig, EarlyStoppingCallback
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import torch
from trl import SFTTrainer
from transformers import EarlyStoppingCallback

In [None]:
# 1. CSV 데이터 불러오기
df = pd.read_csv("qa_100_df.csv")

# 2. prompt / response 형식 만들기
def make_prompt(row):
    return f"""질문: {row['question']}
배경 지식: {row['source']}
답변:"""

df["prompt"] = df.apply(make_prompt, axis=1)
df["response"] = df["answer"]

In [None]:
# 3. Hugging Face Dataset으로 변환
dataset = Dataset.from_pandas(df[["prompt", "response"]])

In [None]:
# 4. Tokenizer & Model 로드 (예: mistral)
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# 5. Dataset 토큰화
def tokenize(example):
    return tokenizer(
        example["prompt"],
        text_target=example["response"],
        truncation=True,
        padding="max_length",
        max_length=1024,
    )

tokenized = dataset.map(tokenize)

In [None]:
# 6. 모델 준비 (QLoRA 구성)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map="auto")
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

In [None]:
# 7. 훈련 설정
training_args = TrainingArguments(
    output_dir="./qlora_model_mistral",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=100,
    fp16=True,
    logging_steps=5,
    evaluation_strategy="steps",
    eval_steps=12,
    save_strategy="steps",
    save_steps=12,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,
)

In [None]:
# 8. Trainer 실행
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    eval_dataset=tokenized,
    tokenizer=tokenizer,
    callbacks=[
        EarlyStoppingCallback(early_stopping_patience=2)
    ],
    peft_config=lora_config
)

In [None]:
trainer.train()

In [None]:
# 9. 모델 저장
model.save_pretrained("qlora_model_mistral")
tokenizer.save_pretrained("qlora_model_mistral")
