In [None]:
from huggingface_hub import login
from decouple import config

huggingface_token = config('HUGGINGFACE_TOKEN')
login(token=huggingface_token, add_to_git_credential=True)

In [2]:
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import Trainer, TrainingArguments, BitsAndBytesConfig
from transformers import AutoProcessor, AutoModelForImageTextToText
import torch
from torch.nn.utils.rnn import pad_sequence
import datasets
from peft import LoraConfig, get_peft_model

In [3]:
# model_id = 'Bllossom/llama-3.2-Korean-Bllossom-3B'
# model_id = 'Bllossom/llama-3.2-Korean-Bllossom-AICA-5B'
model_id = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"

In [4]:
# import torch
# from transformers import AutoModelForCausalLM, AutoTokenizer

# model_name = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct-AWQ"

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     torch_dtype=torch.float16,
#     trust_remote_code=True,
#     device_map="auto"
# )
# tokenizer = AutoTokenizer.from_pretrained(model_name)

# # Choose your prompt
# # prompt = "Explain how wonderful you are"  # English example
# prompt = "스스로를 자랑해 봐"       # Korean example

# messages = [
#     {"role": "system", 
#      "content": "You are EXAONE model from LG AI Research, a helpful assistant."},
#     {"role": "user", "content": prompt}
# ]
# input_ids = tokenizer.apply_chat_template(
#     messages,
#     tokenize=True,
#     add_generation_prompt=True,
#     return_tensors="pt"
# )

# output = model.generate(
#     input_ids.to("cuda"),
#     eos_token_id=tokenizer.eos_token_id,
#     max_new_tokens=128,
#     do_sample=False,
# )
# print(tokenizer.decode(output[0]))


## 파인 튜닝 전 모델 테스트

## 4비트 양자화 설정(QLoRA)

In [5]:
# Bllossom-3B
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
    print("Flash Attention 2를 사용합니다 (bfloat16).")
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16
    print("Eager Attention을 사용합니다 (float16).")

# QLoRA config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=False,
)

config = AutoConfig.from_pretrained(model_id)
config.attention_implementation = attn_implementation 

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=quant_config,
    config=config
)

Flash Attention 2를 사용합니다 (bfloat16).


The repository for LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y
The repository for LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
model

ExaoneForCausalLM(
  (transformer): ExaoneModel(
    (wte): Embedding(102400, 2560, padding_idx=0)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-29): 30 x ExaoneBlock(
        (ln_1): ExaoneRMSNorm()
        (attn): ExaoneAttention(
          (attention): ExaoneSdpaAttention(
            (rotary): ExaoneRotaryEmbedding()
            (k_proj): Linear4bit(in_features=2560, out_features=640, bias=False)
            (v_proj): Linear4bit(in_features=2560, out_features=640, bias=False)
            (q_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
            (out_proj): Linear4bit(in_features=2560, out_features=2560, bias=False)
          )
        )
        (ln_2): ExaoneRMSNorm()
        (mlp): ExaoneGatedMLP(
          (c_fc_0): Linear4bit(in_features=2560, out_features=7168, bias=False)
          (c_fc_1): Linear4bit(in_features=2560, out_features=7168, bias=False)
          (c_proj): Linear4bit(in_features=7168, out_features=2560, bias=False

## Dataset Load

In [7]:
import jsonlines
from datasets import Dataset

# JSONLines 파일 경로
jsonl_path = '/mnt/ssd/1/sanguk/dataset/iljoo_expanded_responses_dataset.jsonl'

# JSONLines 파일을 읽어서 데이터셋 생성
indataset = []
with jsonlines.open(jsonl_path) as f:
    for lineno, line in enumerate(f.iter(), start=1):
        try:
            # Q&A 형태 템플릿으로 instruction과 response 형식을 맞춰서 저장
            template = "{instruction}\n{response}"
            indataset.append(template.format(**line))
        except Exception as e:
            # 문제가 있는 줄과 오류를 출력하여 확인
            print(f"Error at line {lineno}: {e}")

# 데이터셋 확인
print('데이터셋 생성 완료')

# Hugging Face Dataset으로 변환
indataset = Dataset.from_dict({'text': indataset})

# 데이터셋 정보 확인
print(indataset)


데이터셋 생성 완료
Dataset({
    features: ['text'],
    num_rows: 6115
})


In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_id) 
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # 패딩 토큰을 시퀀스 어느 쪽에 할지

def preprocess_dataset(example):
    tokenized = tokenizer(example["text"], truncation=True, max_length=1024, padding="max_length")
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

In [10]:
from datasets import DatasetDict

# train_test_split으로 분할 (반환되는 키는 "train"과 "test")
train_valid_split = indataset.train_test_split(test_size=0.2, seed=42)

# 새로운 DatasetDict 객체를 생성하여 "test" 키 대신 "validation" 키로 재할당
datasets = DatasetDict({
    "train": train_valid_split["train"],
    "validation": train_valid_split["test"]
})

# 이후 전처리 진행 (이 때 validation 데이터셋으로 사용)
train_dataset = datasets["train"].map(
    preprocess_dataset,
    num_proc=2,
    batched=True,
    remove_columns=["text"]
)

valid_dataset = datasets["validation"].map(
    preprocess_dataset,
    num_proc=2,
    batched=True,
    remove_columns=["text"]
)

print(train_dataset)
print("Train dataset length:", len(train_dataset))
print(valid_dataset)
print("Valid dataset length:", len(valid_dataset))

Map (num_proc=2):   0%|          | 0/4892 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/1223 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 4892
})
Train dataset length: 4892
Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 1223
})
Valid dataset length: 1223


In [11]:
print(train_dataset[0]["input_ids"][:10])

[96064, 27458, 41728, 76007, 61016, 696, 3157, 1130, 657, 869]


In [12]:
tokenizer.decode(train_dataset[0]["input_ids"][:100])

'TeamsAI에서 coco 포맷을 지원하는 게 뭐가 중요한가요?\nTeamsAI가 coco 포맷을 지원하면 데이터 이동이 더 쉬워져요. 그래서 다양한 AI 시스템과 통합할 때 편리하고요.[PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD][PAD]'

## Data Collator

- LLM(대형 언어 모델) 파인튜닝 시 **Data Collator**는 배치(batch) 단위로 데이터를 정리하여 모델에 전달하는 역할을 합니다.
- 일반적으로 DataLoader와 함께 사용되며, 토큰 길이를 맞추거나, 필요한 마스크 값을 추가하는 등의 작업을 수행합니다.

In [13]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,  # causal LM인 경우에는 False로 설정합니다.
)

## Configuration TrainingArugments

In [14]:
from transformers import EarlyStoppingCallback

hub_path = "/mnt/ssd/1/hub"
save_model_path = os.path.join(hub_path, "models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1")

class EarlyStoppingWithCombinedLossCallback(EarlyStoppingCallback):
    def __init__(self, weight_train=0.4, weight_eval=0.6, early_stopping_patience=20):
        super().__init__(early_stopping_patience=early_stopping_patience)
        self.weight_train = weight_train
        self.weight_eval = weight_eval
        self.last_train_loss = None

    def on_log(self, args, state, control, logs=None, **kwargs):
        # train loss를 저장합니다.
        if logs is not None and "loss" in logs:
            self.last_train_loss = logs["loss"]

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        # eval_loss가 반드시 metrics에 존재한다고 가정합니다.
        eval_loss = metrics.get("eval_loss")
        if eval_loss is None:
            eval_loss = float("inf")

        # train loss가 아직 없다면 combined loss는 eval_loss로 대체합니다.
        if self.last_train_loss is None:
            combined_loss = eval_loss
        else:
            combined_loss = self.weight_train * self.last_train_loss + self.weight_eval * eval_loss

        # 평가 metrics에 combined loss 추가
        metrics["eval_combined_loss"] = combined_loss

        # 현재 global step에 따른 checkpoint 경로 생성
        current_step = state.global_step
        current_checkpoint = os.path.join(args.output_dir, f"checkpoint-{current_step}")

        # best_metric이 아직 설정되지 않았거나, 지금의 combined_loss가 더 낮으면 업데이트
        if state.best_metric is None or combined_loss < state.best_metric:
            state.best_metric = combined_loss
            state.best_model_checkpoint = current_checkpoint
            print(f"New best checkpoint: {current_checkpoint} with Combined Loss: {combined_loss:.4f}")
        else:
            print(f"Combined Loss at step {current_step}: {combined_loss:.4f}")

        # 기존 EarlyStoppingCallback의 로직을 실행하여 조기 종료 여부를 체크합니다.
        return super().on_evaluate(args, state, control, metrics, **kwargs)
        
training_args = TrainingArguments(
    output_dir=save_model_path,
    per_device_train_batch_size=8,                  
    per_device_eval_batch_size=8, 
    gradient_accumulation_steps=4,
    remove_unused_columns=False,
    report_to="none",
    optim="adamw_bnb_8bit",
    bf16=True,
    num_train_epochs=100,
    logging_strategy='steps',
    logging_steps=50,
    label_names=['labels'],
    metric_for_best_model="eval_combined_loss",
    greater_is_better=False,
    load_best_model_at_end=True,
    save_strategy="steps",
    save_steps=50,
    eval_strategy="steps", 
    eval_steps=50,
    save_on_each_node=True,
    save_total_limit=1,
    torch_compile=True,
    group_by_length=True,             
    lr_scheduler_type="cosine",
    weight_decay=0.01
)

training_args.use_cache = False

## Configuration Trainable Parameters

In [15]:
from peft import PeftModel

# Lora Tuning
peft_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=64,
    lora_alpha=128,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj", "k_proj"]
)

lora_model = get_peft_model(model, peft_config)

# LoRA 파라미터만 학습하도록 설정
for name, param in lora_model.named_parameters():
    if "lora_" in name:
        param.requires_grad = True  
    else:
        param.requires_grad = False  
        
lora_model.print_trainable_parameters()

trainable params: 22,118,400 || all params: 2,427,445,760 || trainable%: 0.9112


## Trainer

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="torch._dynamo")

import warnings
warnings.filterwarnings("ignore", message="Setting `save_embedding_layers` to `True`")

import warnings
warnings.filterwarnings("ignore", message="Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.")

import warnings
warnings.filterwarnings("ignore", message="Could not find a config file in")

import torch._dynamo
torch._dynamo.config.cache_size_limit = 1000

trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    data_collator=data_collator,
    processing_class=tokenizer,
    callbacks=[EarlyStoppingWithCombinedLossCallback(weight_train=0.4, weight_eval=0.6, early_stopping_patience=1000)]
)

print("Fine-tuning 시작...")
trainer.train()
print("Fine-tuning 완료!")

Fine-tuning 시작...


Step,Training Loss,Validation Loss,Runtime,Samples Per Second,Steps Per Second,Combined Loss
50,2.2075,1.939771,44.2543,27.636,3.457,2.046863
100,1.8592,1.784239,43.9885,27.803,3.478,1.814223
150,1.7291,1.698546,43.9642,27.818,3.48,1.710768
200,1.64,1.640615,44.2257,27.654,3.46,1.640369
250,1.5829,1.59208,43.9878,27.803,3.478,1.588408
300,1.565,1.548133,44.126,27.716,3.467,1.55488
350,1.4766,1.517886,44.0266,27.779,3.475,1.501372
400,1.4588,1.488895,43.9209,27.845,3.484,1.476857
450,1.4072,1.469602,44.0362,27.773,3.474,1.444641
500,1.3594,1.44929,43.8716,27.877,3.487,1.413334


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-50 with Combined Loss: 2.0469


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-100 with Combined Loss: 1.8142


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-150 with Combined Loss: 1.7108


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-200 with Combined Loss: 1.6404


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-250 with Combined Loss: 1.5884


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-300 with Combined Loss: 1.5549


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-350 with Combined Loss: 1.5014


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-400 with Combined Loss: 1.4769


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-450 with Combined Loss: 1.4446


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-500 with Combined Loss: 1.4133


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-550 with Combined Loss: 1.3895


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-600 with Combined Loss: 1.3726


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-650 with Combined Loss: 1.3474


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-700 with Combined Loss: 1.3209


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-750 with Combined Loss: 1.3093


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-800 with Combined Loss: 1.2857


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-850 with Combined Loss: 1.2670


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-900 with Combined Loss: 1.2563


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-950 with Combined Loss: 1.2433


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1000 with Combined Loss: 1.2291


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1050: 1.2357


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1100 with Combined Loss: 1.2177


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1150 with Combined Loss: 1.2090


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1200 with Combined Loss: 1.1975


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1250: 1.1991


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1300 with Combined Loss: 1.1886


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1350 with Combined Loss: 1.1767


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1400: 1.1788


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1450 with Combined Loss: 1.1661


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1500 with Combined Loss: 1.1650


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1550: 1.1797


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1600 with Combined Loss: 1.1572


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1650 with Combined Loss: 1.1553


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1700: 1.1639


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1750 with Combined Loss: 1.1447


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1800: 1.1537


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1850: 1.1609


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


New best checkpoint: /mnt/ssd/1/hub/models-iljoodeephub-LGAI-EXAONE-2.4B_bf16_lr64_qlr4_test1/checkpoint-1900 with Combined Loss: 1.1401


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 1950: 1.1403


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2000: 1.1699


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2050: 1.1509


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2100: 1.1496


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2150: 1.1897


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2200: 1.1625


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2250: 1.1578


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2300: 1.1951


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2350: 1.1768


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2400: 1.1705


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2450: 1.1598


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2500: 1.1787


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2550: 1.1737


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2600: 1.1714


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2650: 1.1822


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


Combined Loss at step 2700: 1.1871
