In [1]:
# PyTorch (CUDA 12.1)
!pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0

# Hugging Face 핵심
!pip install transformers==4.43.3 datasets==2.21.0 accelerate==0.31.0 evaluate==0.4.2

# LoRA (peft) & RLHF 도구
!pip install peft==0.11.1 trl==0.9.6

# 전처리/지표
!pip install scikit-learn==1.5.0 scipy==1.13.1 pandas==2.2.2 pyarrow==16.1.0 sentencepiece==0.2.0

# 로깅 & 모니터링
!pip install wandb==0.17.6 tensorboard==2.17.0


Collecting torch==2.3.0
  Downloading torch-2.3.0-cp312-cp312-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision==0.18.0
  Downloading torchvision-0.18.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting torchaudio==2.3.0
  Downloading torchaudio-2.3.0-cp312-cp312-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.0)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.3.0)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.3.0)
  Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (

Collecting scikit-learn==1.5.0
  Downloading scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scipy==1.13.1
  Downloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow==16.1.0
  Downloading pyarrow-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.0 kB)
Collecting sentencepiece==0.2.0
  Downloading sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Downloading scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m140.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [1]:
from huggingface_hub import notebook_login


notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### 파인튜닝

In [5]:
# 1. 라이브러리
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
import pandas as pd
from datasets import Dataset
from peft import LoraConfig, get_peft_model

# 2. 모델 및 토크나이저 불러오기
model_id = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
print(f"'{model_id}' 모델 로드를 시작합니다...")

tokenizer = AutoTokenizer.from_pretrained(model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,   # ✅ A100 → bf16
    device_map="auto",
    trust_remote_code=True
)

# LoRA 설정
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # attention q,v에 LoRA 적용
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
print("LoRA 적용 완료 ✅")

# 3. CSV 데이터셋 불러오기
csv_path = "/content/drive/MyDrive/25-2_ capstone/ksl_translation_dataset_exaone.csv"
df = pd.read_csv(csv_path)

dataset = Dataset.from_pandas(df)
dataset = dataset.train_test_split(test_size=0.1)

# 4. 데이터 전처리
def preprocess_function(examples):
    texts = [
        f"Instruction: 한국어 문장을 한국수어 글로스로 번역하시오.\nInput: {src}\nOutput: {tgt}"
        for src, tgt in zip(examples["src"], examples["tgt"])
    ]
    tokenized = tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=256
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(preprocess_function, batched=True)

# 5. Collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# 6. 학습 인자
training_args = TrainingArguments(
    output_dir="./exaone-ksl",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,
    learning_rate=2e-4,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    weight_decay=0.01,
    bf16=True,            # ✅ A100 → bf16
    report_to="none"
)

# 7. Trainer 연결
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 8. 학습 시작
trainer.train()

# 9. 모델 저장 (LoRA adapter만 저장됨)
save_path = "/content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"✅ LoRA 파인튜닝 완료! 어댑터가 여기 저장됨: {save_path}")




'LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct' 모델 로드를 시작합니다...


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

LoRA 적용 완료 ✅


Map:   0%|          | 0/930 [00:00<?, ? examples/s]

Map:   0%|          | 0/104 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
0,1.0345,0.901442
1,0.7519,0.857572
2,0.7155,0.847656


✅ LoRA 파인튜닝 완료! 어댑터가 여기 저장됨: /content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora


In [7]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model_id = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
adapter_path = "/content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora"
final_save_path = "/content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged"

# 원본 모델 로드
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

# LoRA 어댑터 병합
merged_model = PeftModel.from_pretrained(base_model, adapter_path)
merged_model = merged_model.merge_and_unload()   # 🔑 병합 + 어댑터 제거

# 최종 모델 저장 (이제 하나의 완전한 모델이 됨)
merged_model.save_pretrained(final_save_path, safe_serialization=True)
tokenizer.save_pretrained(final_save_path)

print(f"✅ 최종 병합 모델 저장 완료: {final_save_path}")


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

✅ 최종 병합 모델 저장 완료: /content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged


In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "/content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged",
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(
    "/content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged"
)

text = "Instruction: 한국어 문장을 한국수어 글로스로 번역하시오.\nInput: 안녕하세요\nOutput:"
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


The repository for /content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co//content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y
The repository for /content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co//content/drive/MyDrive/25-2_ capstone/exaone-ksl-lora-merged.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Instruction: 한국어 문장을 한국수어 글로스로 번역하시오.
Input: 안녕하세요
Output:
Output: 안녕하다1 맞다1 안녕하다1 맞다1 인사1 하다1# 지시1# 안녕하다1 맞다1 안녕하다1 맞다1 인사1 하다1# 지시1# 안녕하다1 맞다
