In [None]:
!pip install -U accelerate huggingface-hub==0.23.0 transformers==4.40.1 datasets==2.19.0 peft==0.10.0 bitsandbytes==0.43.1 trl==0.8.6

In [None]:
!pip install pytorch-lightning==1.9.4

In [2]:
import torch
import os
import torch
from datasets import load_dataset

from transformers import (
    PreTrainedTokenizerFast,
    GPT2LMHeadModel,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from trl import SFTTrainer

import huggingface_hub

In [None]:
token = "your_token"
huggingface_hub.login(token=token)

# 데이터 및 모델 불러오기

In [None]:
# Hugging Face Basic Model 한국어 모델
base_model = "skt/kogpt2_base_v2"
data = "huggingface_dataset_path"
dataset = load_dataset(data, split="train")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
tokenizer = PreTrainedTokenizerFast.from_pretrained(base_model,
  bos_token='</s>', eos_token='</s>', unk_token='<unk>',
  pad_token='<pad>', mask_token='<mask>')

model = GPT2LMHeadModel.from_pretrained(base_model)

# Callbach

In [44]:
from pytorch_lightning.callbacks import EarlyStopping
from transformers import TrainerCallback

In [45]:
class EarlyStoppingCallback(TrainerCallback):
    def __init__(self, early_stopping_patience=5, early_stopping_threshold=0.01):
        self.early_stopping_patience = early_stopping_patience
        self.early_stopping_threshold = early_stopping_threshold
        self.best_loss = None
        self.early_stopping_counter = 0

    def on_evaluate(self, args, state, control, **kwargs):
        logs = kwargs.get("logs", {})
        current_loss = logs.get("eval_loss")

        if current_loss is None:
            return

        if self.best_loss is None or current_loss < self.best_loss - self.early_stopping_threshold:
            self.best_loss = current_loss
            self.early_stopping_counter = 0
        else:
            self.early_stopping_counter += 1
            if self.early_stopping_counter >= self.early_stopping_patience:
                control.should_training_stop = True

In [46]:
early_stopping_callback = EarlyStoppingCallback(early_stopping_patience=5, early_stopping_threshold=0.01)

# 학습

In [None]:
training_params = TrainingArguments(
        output_dir="./results",
        num_train_epochs=18,
        per_device_train_batch_size=16,
        gradient_accumulation_steps=1,
        optim="paged_adamw_32bit",
        save_steps=1000,
        logging_steps=200,
        learning_rate=2e-4,
        weight_decay=0.001,
        fp16=False,
        bf16=False,
        max_grad_norm=0.3,
        max_steps=-1,
        warmup_ratio=0.03,
        group_by_length=True,
        lr_scheduler_type="constant",
        report_to="tensorboard"
    )

trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        dataset_text_field="text",
        max_seq_length=None,
        tokenizer=tokenizer,
        args=training_params,
        packing=False,
        callbacks=[early_stopping_callback]
    )

In [None]:
trainer.train()
#logging.set_verbosity(logging.CRITICAL)

# 학습 후 출력 확인

In [None]:
prompt = "환불이 가능한지 물어보려고 연락했어요."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=100)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

input_text = "환불이 가능한지 물어보려고 연락했어요."
input_ids = tokenizer.encode(input_text, return_tensors="pt")

output = model.generate(input_ids, max_length=100, num_return_sequences=1, temperature=1.0, pad_token_id=tokenizer.eos_token_id)

decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
print(decoded_output)

# 모델 저장

In [50]:
# 학습한 모델 hugging face 내 저장한 모델경로 설정
model_save_path = "huggingface_model_save_path"

In [None]:
# 허깅페이스에 바로 모델 올리기.
model.push_to_hub(
			model_save_path,
			use_temp_dir=True,
			use_auth_token=token
)
tokenizer.push_to_hub(
			model_save_path,
			use_temp_dir=True,
			use_auth_token=token
)