# Library Install and Load

In [1]:
!pip install datasets transformers torch peft accelerate scikit-learn

from google.colab import drive
drive.mount('/content/drive')

from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from peft import get_peft_model, LoraConfig, TaskType
import pandas as pd
import numpy as np
import torch
import time
import gc
import os
import json
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

import wandb
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"
try:
    wandb.finish()
except:
    pass

print(f"GPU: {torch.cuda.is_available()}")

project_folder = '/content/drive/MyDrive/KMU/NLP'
os.makedirs(project_folder, exist_ok=True)
print(f"Project Folder: {project_folder}")

Collecting numpy>=1.17 (from datasets)
  Downloading numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Downloading numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m64.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.3.0
    Uninstalling numpy-2.3.0:
      Successfully uninstalled numpy-2.3.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.0 which is incompatible.
google-colab 1.0.0 requires requests==2.32.3, but you have reque

  from scipy.sparse import csr_matrix, issparse


GPU: True
Project Folder: /content/drive/MyDrive/KMU/NLP


In [2]:
!pip install --upgrade --force-reinstall datasets transformers huggingface_hub fsspec

Collecting datasets
  Using cached datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting transformers
  Downloading transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.33.0-py3-none-any.whl.metadata (14 kB)
Collecting fsspec
  Downloading fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)
Collecting filelock (from datasets)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting numpy>=1.17 (from datasets)
  Downloading numpy-2.3.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-20.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Downloading pandas-2.3.0-cp

# Download Dataset and Preprocessing

In [2]:
print("Start Download Dataset")
print("=" * 50)

### Downlaod SST2
sst2_dataset = load_dataset("glue", "sst2")

print("Complete loading SST2")
print(f"Train: {len(sst2_dataset['train'])}")
print(f"Valid: {len(sst2_dataset['validation'])}")

print("\nSST2 Sample:")
for i in range(2):
    sample = sst2_dataset['train'][i]
    label_text = "positive" if sample['label'] == 1 else "negative"
    print(f"Text: {sample['sentence']}")
    print(f"Label: {sample['label']} ({label_text})")
    print()


### Downlaod AG News
ag_news_dataset = load_dataset("ag_news")

print("Complete loading AG News")
print(f"Train: {len(ag_news_dataset['train'])}")
print(f"Test: {len(ag_news_dataset['test'])}")

train_data = ag_news_dataset['train']
df = pd.DataFrame({'text': train_data['text'], 'label': train_data['label']})

train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['text'], df['label'], test_size=0.1, random_state=42, stratify=df['label']
)

# Reconstructing the AG News dataset
ag_news_split = DatasetDict({
    'train': Dataset.from_dict({'text': train_texts.tolist(), 'label': train_labels.tolist()}),
    'validation': Dataset.from_dict({'text': val_texts.tolist(), 'label': val_labels.tolist()}),
    'test': ag_news_dataset['test']
})

print(f"After Split - Train: {len(ag_news_split['train'])}, Valid: {len(ag_news_split['validation'])}")

ag_news_labels = ['World', 'Sports', 'Business', 'Sci/Tech']
print("\nAG News Sample:")
for i in range(2):
    sample = ag_news_split['train'][i]
    print(f"Text: {sample['text'][:80]}...")
    print(f"Label: {sample['label']} ({ag_news_labels[sample['label']]})")
    print()

# Download KLUE-NLI
klue_nli_dataset = load_dataset("klue", "nli")

print("Complete loading KLUE-NLI")
print(f"Origin Train: {len(klue_nli_dataset['train'])}")
print(f"Origin Valid: {len(klue_nli_dataset['validation'])}")

np.random.seed(42)

train_indices = np.random.choice(len(klue_nli_dataset['train']), 5000, replace=False)
klue_train_sample = klue_nli_dataset['train'].select(train_indices)

val_indices = np.random.choice(len(klue_nli_dataset['validation']), 1000, replace=False)
klue_val_sample = klue_nli_dataset['validation'].select(val_indices)

klue_nli_split = DatasetDict({
    'train': klue_train_sample,
    'validation': klue_val_sample
})

print(f"After Split - Train: {len(klue_nli_split['train'])}, Valid: {len(klue_nli_split['validation'])}")

klue_nli_labels = ['entailment', 'contradiction', 'neutral']
print("\nKLUE-NLI Sample:")
for i in range(2):
    sample = klue_nli_split['train'][i]
    print(f"Premise: {sample['premise']}")
    print(f"Hypothesis: {sample['hypothesis']}")
    print(f"Label: {sample['label']} ({klue_nli_labels[sample['label']]})")
    print()

Start Download Dataset


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Complete loading SST2
Train: 67349
Valid: 872

SST2 Sample:
Text: hide new secretions from the parental units 
Label: 0 (negative)

Text: contains no wit , only labored gags 
Label: 0 (negative)



README.md:   0%|          | 0.00/8.07k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Complete loading AG News
Train: 120000
Test: 7600
After Split - Train: 108000, Valid: 12000

AG News Sample:
Text: 10 seconds that change everything ATHENS - Ten seconds. Barely time enough to ti...
Label: 1 (Sports)

Text: Charline Labonte rises to challenge Charline Labonte has served notice she wants...
Label: 1 (Sports)



README.md:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.83M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/224k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/24998 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Complete loading KLUE-NLI
Origin Train: 24998
Origin Valid: 3000
After Split - Train: 5000, Valid: 1000

KLUE-NLI Sample:
Premise: 또한 대전시에 있는 대학, 연구기관, 공공기관 등 인적 역량이 뛰어난 기관들과의 협력체계를 잘 구축해 사업계획의 실현가능성에서 높은 점수를 받았다.
Hypothesis: 인적 역량이 뛰어난 기관들과의 협력체계를 잘 구축하면 사업계획에 높은 점수를 받을 수 있다.
Label: 0 (entailment)

Premise: 프리터인 가네코 뎃페이는 아침 출근 시간으로 북적대는 지하철을 타고 취직 면접을 보러 가는 중, 여중생에게 치한이라고 오해받는다.
Hypothesis: 여중생은 가네코 뎃페이가 치한이라고 생각했다.
Label: 0 (entailment)



# Tokenizer

In [3]:
# Load DistilBERT Tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_sst2(examples):
    return tokenizer(examples['sentence'], truncation=True, padding=True, max_length=128)

def tokenize_ag_news(examples):
    return tokenizer(examples['text'], truncation=True, padding=True, max_length=256)

def tokenize_klue_nli(examples):
    return tokenizer(examples['premise'], examples['hypothesis'], truncation=True, padding=True, max_length=256)

# SST2 Tokenizing
sst2_tokenized = sst2_dataset.map(tokenize_sst2, batched=True, remove_columns=['sentence', 'idx'])
print("Complete Tokenizing SST2")

# AG News Tokenizing
ag_news_tokenized = ag_news_split.map(tokenize_ag_news, batched=True, remove_columns=['text'])
print("Complete Tokenizing AG News")

# KLUE-NLI Tokenizing
klue_nli_tokenized = klue_nli_split.map(
    tokenize_klue_nli,
    batched=True,
    remove_columns=['premise', 'hypothesis', 'source']
)
print("Complete Tokenizing KLUE-NLI")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

Complete Tokenizing SST2


Map:   0%|          | 0/108000 [00:00<?, ? examples/s]

Map:   0%|          | 0/12000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

Complete Tokenizing AG News


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Complete Tokenizing KLUE-NLI


In [4]:
def get_gpu_memory():
    if torch.cuda.is_available():
        return torch.cuda.memory_allocated() / 1024**3
    return 0

def get_model_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    accuracy = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='weighted')

    return {'accuracy': accuracy, 'f1': f1}

def clear_memory():
    torch.cuda.empty_cache()
    gc.collect()
    print(f"Complete cleanup Memory - GPU Memory: {get_gpu_memory():.2f} GB")

# Full-Fine Tuning DistillBERT

In [5]:
def train_full_model(model_name, train_data, eval_data, num_labels, num_epochs=3):
    """Full Fine-tuning 모델 학습"""
    print(f"\n{model_name} - Full Fine-tuning 시작!")
    print("-" * 50)

    # generate model
    model = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",
        num_labels=num_labels
    )

    # parameter infomation
    total_params, trainable_params = get_model_parameters(model)
    print(f"Model Info:")
    print(f"   Total Parameter: {total_params:,}")
    print(f"   Train Parameter: {trainable_params:,}")
    print(f"   Train Ratio: {trainable_params/total_params*100:.1f}%")

    # train setting
    output_dir = f"/content/drive/MyDrive/KMU/NLP/training_outputs/{model_name}_full"
    os.makedirs(output_dir, exist_ok=True)

    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=num_epochs,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_steps=50,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True,
        report_to=[],
        run_name=None,
        log_level='error',
        disable_tqdm=False,
        dataloader_pin_memory=False,
    )

    # generate trainer
    data_collator = DataCollatorWithPadding(tokenizer)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_data,
        eval_dataset=eval_data,
        compute_metrics=compute_metrics,
        data_collator=data_collator,
    )

    # training
    start_time = time.time()
    start_memory = get_gpu_memory()

    train_result = trainer.train()
    eval_result = trainer.evaluate()

    end_time = time.time()
    max_memory = get_gpu_memory()

    # save results
    results = {
        'model_name': f"{model_name}_Full",
        'training_time': end_time - start_time,
        'max_memory_usage': max_memory,
        'accuracy': eval_result['eval_accuracy'],
        'f1_score': eval_result['eval_f1'],
        'total_params': total_params,
        'trainable_params': trainable_params
    }

    print(f"{model_name} Completer Full Fine-tuning!")
    print(f"   Train time: {results['training_time']:.1f}sec")
    print(f"   Max Memory: {results['max_memory_usage']:.2f} GB")
    print(f"   Accuracy: {results['accuracy']:.4f}")
    print(f"   F1-Score: {results['f1_score']:.4f}")

    return model, results

In [None]:
# SST2 Full Fine-tuning
clear_memory()
sst2_full_model, sst2_full_results = train_full_model(
    "SST2", sst2_tokenized['train'], sst2_tokenized['validation'], 2
)

Complete cleanup Memory - GPU Memory: 0.25 GB

SST2 - Full Fine-tuning 시작!
--------------------------------------------------


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model Info:
   Total Parameter: 66,955,010
   Train Parameter: 66,955,010
   Train Ratio: 100.0%


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1854,0.281588,0.897936,0.897944
2,0.0858,0.370427,0.895642,0.89559
3,0.0825,0.41258,0.902523,0.902502


SST2 Completer Full Fine-tuning!
   Train time: 1372.3sec
   Max Memory: 1.02 GB
   Accuracy: 0.9025
   F1-Score: 0.9025


In [None]:
# AG News Full Fine-tuning
clear_memory()
ag_news_full_model, ag_news_full_results = train_full_model(
    "AG_News", ag_news_tokenized['train'], ag_news_tokenized['validation'], 4
)

Complete cleanup Memory - GPU Memory: 0.52 GB

AG_News - Full Fine-tuning 시작!
--------------------------------------------------
Model Info:
   Total Parameter: 66,956,548
   Train Parameter: 66,956,548
   Train Ratio: 100.0%


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2189,0.193541,0.936583,0.936466
2,0.1293,0.193647,0.946,0.945908
3,0.0385,0.2258,0.947167,0.947149


AG_News Completer Full Fine-tuning!
   Train time: 7301.9sec
   Max Memory: 1.29 GB
   Accuracy: 0.9472
   F1-Score: 0.9471


In [None]:
# KLUE-NLI Full Fine-tuning
clear_memory()
klue_nli_full_model, klue_nli_full_results = train_full_model(
    "KLUE_NLI", klue_nli_tokenized['train'], klue_nli_tokenized['validation'], 3
)

Complete cleanup Memory - GPU Memory: 0.00 GB

KLUE_NLI - Full Fine-tuning 시작!
--------------------------------------------------


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model Info:
   Total Parameter: 66,955,779
   Train Parameter: 66,955,779
   Train Ratio: 100.0%


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.0994,1.099778,0.312,0.14839
2,1.1031,1.108731,0.343,0.175203
3,1.1001,1.097491,0.342,0.257868


KLUE_NLI Completer Full Fine-tuning!
   Train time: 349.9sec
   Max Memory: 0.79 GB
   Accuracy: 0.3430
   F1-Score: 0.1752


# Fine-Tuning DistillBERT with LoRA

In [6]:
def train_lora_model(model_name, train_data, eval_data, num_labels, lora_r=8, num_epochs=3):
    """LoRA Fine-tuning 모델 학습"""
    print(f"\n{model_name} - LoRA Fine-tuning 시작!")
    print("-" * 50)

    # base model
    model = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",
        num_labels=num_labels
    )

    # LoRA setting
    lora_config = LoraConfig(
        r=lora_r,
        lora_alpha=32,
        target_modules=["q_lin", "v_lin"],  # DistilBERT attention layers
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.SEQ_CLS,
    )

    # LoRA apply
    model = get_peft_model(model, lora_config)

    # Parameter Information
    total_params, trainable_params = get_model_parameters(model)
    print(f"📊 LoRA Model Info:")
    print(f"   Total Parameter: {total_params:,}")
    print(f"   Train Prarmeter: {trainable_params:,}")
    print(f"   Train Ratio: {trainable_params/total_params*100:.4f}%")
    model.print_trainable_parameters()

    # Train setting
    output_dir = f"/content/drive/MyDrive/KMU/NLP/training_outputs/{model_name}_lora"
    os.makedirs(output_dir, exist_ok=True)

    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=num_epochs,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_steps=100,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        greater_is_better=True,
        report_to=None,
        dataloader_pin_memory=False,
    )

    # Generate Trainer
    data_collator = DataCollatorWithPadding(tokenizer)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_data,
        eval_dataset=eval_data,
        compute_metrics=compute_metrics,
        data_collator=data_collator,
    )

    # Training
    start_time = time.time()
    start_memory = get_gpu_memory()

    train_result = trainer.train()
    eval_result = trainer.evaluate()

    end_time = time.time()
    max_memory = get_gpu_memory()

    # save results
    results = {
        'model_name': f"{model_name}_LoRA",
        'training_time': end_time - start_time,
        'max_memory_usage': max_memory,
        'accuracy': eval_result['eval_accuracy'],
        'f1_score': eval_result['eval_f1'],
        'total_params': total_params,
        'trainable_params': trainable_params
    }

    print(f"{model_name} Complete LoRA Fine-tuning!")
    print(f"   Train Time: {results['training_time']:.1f}sec")
    print(f"   Max Memory: {results['max_memory_usage']:.2f} GB")
    print(f"   Accuracy: {results['accuracy']:.4f}")
    print(f"   F1-Score: {results['f1_score']:.4f}")

    return model, results

In [None]:
# SST2 LoRA Fine-tuning
clear_memory()
sst2_lora_model, sst2_lora_results = train_lora_model(
    "SST2", sst2_tokenized['train'], sst2_tokenized['validation'], 2
)

Complete cleanup Memory - GPU Memory: 0.27 GB

SST2 - LoRA Fine-tuning 시작!
--------------------------------------------------
📊 LoRA Model Info:
   Total Parameter: 67,694,596
   Train Prarmeter: 739,586
   Train Ratio: 1.0925%
trainable params: 739,586 || all params: 67,694,596 || trainable%: 1.0925
{'loss': 0.6952, 'grad_norm': 1.0455505847930908, 'learning_rate': 9.900000000000002e-06, 'epoch': 0.023752969121140142}
{'loss': 0.6721, 'grad_norm': 1.241965889930725, 'learning_rate': 1.9900000000000003e-05, 'epoch': 0.047505938242280284}
{'loss': 0.4797, 'grad_norm': 3.8688626289367676, 'learning_rate': 2.9900000000000002e-05, 'epoch': 0.07125890736342043}
{'loss': 0.3802, 'grad_norm': 3.1454901695251465, 'learning_rate': 3.99e-05, 'epoch': 0.09501187648456057}
{'loss': 0.355, 'grad_norm': 3.4124722480773926, 'learning_rate': 4.99e-05, 'epoch': 0.1187648456057007}
{'loss': 0.3654, 'grad_norm': 2.195112466812134, 'learning_rate': 4.959192085737841e-05, 'epoch': 0.14251781472684086}
{'lo

In [7]:
# AG News LoRA Fine-tuning
clear_memory()
ag_news_lora_model, ag_news_lora_results = train_lora_model(
    "AG_News", ag_news_tokenized['train'], ag_news_tokenized['validation'], 4
)

Complete cleanup Memory - GPU Memory: 0.00 GB

AG_News - LoRA Fine-tuning 시작!
--------------------------------------------------


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


📊 LoRA Model Info:
   Total Parameter: 67,697,672
   Train Prarmeter: 741,124
   Train Ratio: 1.0948%
trainable params: 741,124 || all params: 67,697,672 || trainable%: 1.0948


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.2104,0.220441,0.924,0.923784
2,0.2038,0.204753,0.930083,0.929981
3,0.212,0.199737,0.932667,0.932564


AG_News Complete LoRA Fine-tuning!
   Train Time: 5123.1sec
   Max Memory: 0.27 GB
   Accuracy: 0.9327
   F1-Score: 0.9326


In [8]:
# KLUE-NLI LoRA Fine-tuning
clear_memory()
klue_nli_lora_model, klue_nli_lora_results = train_lora_model(
    "KLUE_NLI", klue_nli_tokenized['train'], klue_nli_tokenized['validation'], 3
)

Complete cleanup Memory - GPU Memory: 0.27 GB

KLUE_NLI - LoRA Fine-tuning 시작!
--------------------------------------------------


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


📊 LoRA Model Info:
   Total Parameter: 67,696,134
   Train Prarmeter: 740,355
   Train Ratio: 1.0936%
trainable params: 740,355 || all params: 67,696,134 || trainable%: 1.0936


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,1.1037,1.099645,0.336,0.219391
2,1.1019,1.109152,0.343,0.175203
3,1.0986,1.098994,0.337,0.1955


KLUE_NLI Complete LoRA Fine-tuning!
   Train Time: 255.9sec
   Max Memory: 0.53 GB
   Accuracy: 0.3430
   F1-Score: 0.1752
