# 뉴스 편향성 분석 모델

이 노트북은 뉴스 기사의 민주당과 국힘에 대한 편향성을 분석하는 딥러닝 모델을 구현합니다.

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModel,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
    DataCollatorWithPadding
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 1. 데이터 로드 및 전처리

In [3]:
# 베이스라인 결과 로드
baseline_results = pd.read_csv('/content/drive/MyDrive/텍스트데이터분석을 위한 딥러닝/팀프로젝트/baseline_results/baseline_results.csv').iloc[0]

In [4]:
# 데이터 로드
df = pd.read_csv('/content/drive/MyDrive/텍스트데이터분석을 위한 딥러닝/팀프로젝트/data/정당_관점_라벨링_최종.csv')  # 파일 경로는 실제 경로에 맞게 수정해주세요

# 정당 레이블 매핑
party_mapping = {'국민의힘': 0, '민주당': 1, '그외': 2}

# 정당 레이블 변환
df['party_label'] = df['party'].map(party_mapping)

# NaN 값 처리
df = df.dropna(subset=['title_cleaned', 'content_cleaned', 'party_label'])

# 제목과 본문 결합
df['text'] = df['title_cleaned'] + ' ' + df['content_cleaned']

print(f"전체 데이터 수: {len(df)}")
print("\n정당별 기사 수:")
print(df['party'].value_counts())
print("\n정당 레이블 분포:")
print(df['party_label'].value_counts())

전체 데이터 수: 1000

정당별 기사 수:
party
그외      420
민주당     314
국민의힘    266
Name: count, dtype: int64

정당 레이블 분포:
party_label
2    420
1    314
0    266
Name: count, dtype: int64


## 2. 데이터셋 클래스 정의

In [5]:
class NewsDataset(Dataset):
    def __init__(self, texts, party_labels, tokenizer, max_length=512):
        self.texts = texts
        self.party_labels = party_labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # 텐서 차원 조정
        input_ids = encoding['input_ids'].squeeze(0)  # [max_length]
        attention_mask = encoding['attention_mask'].squeeze(0)  # [max_length]

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'party_label': torch.tensor(self.party_labels[idx], dtype=torch.long)
        }

## 3. 모델 정의

In [6]:
class NewsBiasModel(torch.nn.Module):
    def __init__(self, model_name, num_party_labels=3, class_weights=None,
                 dropout_rate=0.2, hidden_size=256):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.register_buffer("class_weights", torch.tensor(class_weights, dtype=torch.float))

        # 드롭아웃 레이어
        self.dropout = torch.nn.Dropout(dropout_rate)

        # 특성 추출 레이어 (단순화)
        hidden_size_bert = self.bert.config.hidden_size
        self.feature_layer = torch.nn.Sequential(
            torch.nn.Linear(hidden_size_bert, hidden_size),
            torch.nn.LayerNorm(hidden_size),  # Layer Normalization만 추가
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_rate)
        )

        # 정당 분류기 (단순화)
        self.party_classifier = torch.nn.Sequential(
            torch.nn.Linear(hidden_size, num_party_labels)
        )

        self._init_weights()

    def _init_weights(self):
        """모델 가중치 초기화"""
        for module in [self.feature_layer, self.party_classifier]:
            for m in module.modules():
                if isinstance(m, torch.nn.Linear):
                    torch.nn.init.xavier_uniform_(m.weight)
                    if m.bias is not None:
                        torch.nn.init.zeros_(m.bias)

    def forward(self, input_ids, attention_mask, party_label=None):
        # BERT 출력
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0, :]  # [CLS] 토큰

        # 특성 추출
        features = self.dropout(pooled_output)
        features = self.feature_layer(features)

        # 정당 예측
        party_logits = self.party_classifier(features)

        if party_label is not None:
            # 손실 계산
            loss_fct = torch.nn.CrossEntropyLoss(weight=self.class_weights)
            party_loss = loss_fct(party_logits, party_label)
            return {'loss': party_loss, 'party_logits': party_logits}

        return {'party_logits': party_logits}

In [7]:
class MultiTaskNewsModel(torch.nn.Module):
    def __init__(self, model_name, num_stance_labels=3, num_sentiment_labels=3, mask_token_id=50264, class_weights=None):
        super().__init__()
        # 기본 모델 로드
        self.bert = AutoModel.from_pretrained(model_name)
        self.mask_token_id = mask_token_id
        self.register_buffer("class_weights", torch.tensor(class_weights, dtype=torch.float))

        # 드롭아웃 레이어
        self.dropout = torch.nn.Dropout(0.2)

        # 공통 특성 추출 레이어
        hidden_size = self.bert.config.hidden_size
        self.shared_layer = torch.nn.Linear(hidden_size, 256)
        self.activation = torch.nn.ReLU()

        # 스탠스 분류기
        self.stance_classifier = torch.nn.Sequential(
            torch.nn.Linear(256, 128),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(128, num_stance_labels)
        )

        # 감성 분류기
        self.sentiment_classifier = torch.nn.Sequential(
            torch.nn.Linear(256, 128),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(128, num_sentiment_labels)
        )

        # 감성 손실 계산용 vocab projection 레이어
        self.sentiment_vocab_projection = torch.nn.Linear(hidden_size, self.bert.config.vocab_size)

        # 모델 가중치 초기화
        self._init_weights()

    def _init_weights(self):
        """모델 가중치 초기화"""
        for module in [self.shared_layer, self.stance_classifier, self.sentiment_classifier]:
            for m in module.modules():
                if isinstance(m, torch.nn.Linear):
                    torch.nn.init.xavier_uniform_(m.weight)
                    if m.bias is not None:
                        torch.nn.init.zeros_(m.bias)

    def forward(self, input_ids, attention_mask, stance_label=None):
        # BERT 출력
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.last_hidden_state[:, 0, :]  # [CLS] 토큰

        # 공통 특성 추출
        shared_features = self.dropout(pooled_output)
        shared_features = self.shared_layer(shared_features)
        shared_features = self.activation(shared_features)

        # 스탠스 예측
        stance_logits = self.stance_classifier(shared_features)

        # 감성 예측
        sentiment_logits = self.sentiment_classifier(shared_features)

        if stance_label is not None:
            # 스탠스 손실 계산
            loss_fct = torch.nn.CrossEntropyLoss(weight=self.class_weights)
            stance_loss = loss_fct(stance_logits, stance_label)

            # 감성 분석 손실 계산
            sentiment_loss = self._compute_sentiment_loss(sentiment_logits, input_ids, attention_mask)

            # 총 손실 계산
            total_loss = stance_loss + 0.5 * sentiment_loss

            return {
                'loss': total_loss,
                'stance_logits': stance_logits,
                'sentiment_logits': sentiment_logits
            }

        return {
            'stance_logits': stance_logits,
            'sentiment_logits': sentiment_logits
        }

    def _compute_sentiment_loss(self, sentiment_logits, input_ids, attention_mask):
        """단순화된 감성 분석 손실 계산"""
        try:
            # 마스킹된 입력 생성
            masked_input_ids = input_ids.clone()
            mask_prob = 0.15

            # 마스킹 마스크 생성 (패딩 토큰 제외)
            mask_mask = torch.rand_like(input_ids.float()) < mask_prob
            mask_mask = mask_mask & (attention_mask == 1)

            if not mask_mask.any():
                return torch.tensor(0.0, device=input_ids.device)

            # 마스킹 적용
            mask_token_id = self.mask_token_id
            masked_input_ids[mask_mask] = mask_token_id

            # 마스킹된 입력에 대한 예측
            # 입력 텐서의 차원 확인 및 조정
            if len(masked_input_ids.shape) == 1:
                masked_input_ids = masked_input_ids.unsqueeze(0)
            if len(attention_mask.shape) == 1:
                attention_mask = attention_mask.unsqueeze(0)

            # BERT 모델에 입력
            masked_outputs = self.bert(
                input_ids=masked_input_ids,
                attention_mask=attention_mask,
                return_dict=True
            )

            # 마스킹된 위치의 로짓 추출
            last_hidden_state = masked_outputs.last_hidden_state  # [batch_size, seq_len, hidden_size]
            masked_positions = mask_mask.nonzero(as_tuple=True)  # (batch_indices, seq_indices)

            # 마스킹된 위치의 로짓만 추출
            masked_logits = last_hidden_state[masked_positions]  # [num_masked, hidden_size]

            # 원본 입력과의 차이를 손실로 사용
            target_ids = input_ids[mask_mask]  # [num_masked]

            # 로짓을 어휘 크기에 맞게 변환
            vocab_size = self.bert.config.vocab_size

            # 로짓 투사 후 CrossEntropyLoss 계산
            masked_logits = self.sentiment_vocab_projection(masked_logits)

            # 라벨 범위 검증
            assert torch.max(target_ids) < vocab_size, f"target_id max {torch.max(target_ids)} >= vocab size {vocab_size}"
            assert torch.min(target_ids) >= 0, f"target_id min {torch.min(target_ids)} < 0"

            # 손실 계산
            loss_fct = torch.nn.CrossEntropyLoss()
            sentiment_loss = loss_fct(masked_logits, target_ids)

            return sentiment_loss

        except Exception as e:
            print(f"감성 분석 손실 계산 중 오류 발생: {str(e)}")
            return torch.tensor(0.0, device=input_ids.device)

## 4. 학습 준비

In [8]:
# 데이터 분할
train_texts, val_texts, train_party_labels, val_party_labels = train_test_split(
    df['text'].values,
    df['party_label'].values,
    test_size=0.1,
    random_state=42
)

In [None]:
# 토크나이저 초기화
tokenizer = AutoTokenizer.from_pretrained('klue/roberta-base')

# 데이터셋 생성
train_dataset = NewsDataset(train_texts, train_party_labels, tokenizer)
val_dataset = NewsDataset(val_texts, val_party_labels, tokenizer)

In [10]:
label_list = [example['party_label'] for example in train_dataset]
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.array([0, 1, 2]),  # 0: 국민의힘, 1: 민주당, 2: 그외
    y=np.array(label_list)
)
print("클래스 가중치:", class_weights)

클래스 가중치: [1.28205128 1.04166667 0.79365079]


In [None]:
# 모델 초기화
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 모델 초기화 및 GPU 이동을 분리
model = NewsBiasModel('klue/roberta-base', class_weights=class_weights)
model = model.to(device)

# 커스텀 데이터 콜레이터
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [12]:
def compute_metrics(eval_pred):
    party_preds = eval_pred.predictions
    party_labels = eval_pred.label_ids

    party_preds = np.argmax(party_preds, axis=1)

    party_report = classification_report(
        party_labels,
        party_preds,
        target_names=['국민의힘', '민주당', '그외'],
        output_dict=True,
        zero_division=0
    )

    return {
        'party_f1': party_report['weighted avg']['f1-score'],
        'party_accuracy': party_report['accuracy'],
    }

In [None]:
# 학습 인자 설정
training_args = TrainingArguments(
    output_dir='./news_bias_results',  # 디렉토리 이름 변경
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    learning_rate=1e-5,
    warmup_ratio=0.1,
    weight_decay=0.05,
    logging_dir='./news_bias_logs',  # 로그 디렉토리 이름 변경
    logging_steps=100,
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='party_f1',  # 평가 메트릭 이름 변경
    gradient_accumulation_steps=2,  # 그래디언트 누적
    fp16=True,  # 혼합 정밀도 학습
    label_smoothing_factor=0.1,  # 레이블 스무딩
    optim='adamw_torch'  # AdamW 옵티마이저 사용
)

# 트레이너 초기화
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.001)]
)

## 5. 학습 및 평가

In [None]:
# 학습 실행
trainer.train()

# 최종 평가
final_metrics = trainer.evaluate()
print("\n최종 평가 결과:")
print(f"정당 분류 F1 점수: {final_metrics['eval_party_f1']:.4f}")
print(f"정당 분류 정확도: {final_metrics['eval_party_accuracy']:.4f}")

In [None]:
# 베이스라인과 성능 비교
comparison_results = {
    'Model': ['Baseline', 'News Bias BERT'],  # 모델 이름 변경
    'Party F1': [baseline_results['party_f1'], final_metrics['eval_party_f1']],  # 메트릭 이름 변경
    'Party Accuracy': [baseline_results['party_accuracy'], final_metrics['eval_party_accuracy']],  # 메트릭 이름 변경
}

comparison_df = pd.DataFrame(comparison_results)
comparison_df.to_csv('./news_bias_results/model_comparison.csv', index=False)  # 저장 경로 변경
comparison_df.head()

In [None]:
# 성능 향상 시각화
plt.figure(figsize=(12, 6))
metrics = ['Party F1', 'Party Accuracy']  # 메트릭 이름 변경
x = np.arange(len(metrics))
width = 0.35

plt.bar(x - width/2, comparison_df.iloc[0, 1:], width, label='Baseline')
plt.bar(x + width/2, comparison_df.iloc[1, 1:], width, label='News Bias BERT')  # 모델 이름 변경

plt.xlabel('Metrics')
plt.ylabel('Score')
plt.title('Model Performance Comparison')
plt.xticks(x, metrics)
plt.legend()

plt.savefig('./news_bias_results/performance_comparison.png')  # 저장 경로 변경
plt.show()

## 6. 하이퍼파라미터 튜닝

In [None]:
!pip install optuna
!pip install optuna-integration[pytorch_lightning]  # 설치 후 세션을 다시 시작해야 함

In [14]:
import optuna
from optuna.integration import PyTorchLightningPruningCallback
from sklearn.model_selection import KFold

In [15]:
def objective(trial):
    # 하이퍼파라미터 정의
    params = {
        'learning_rate': trial.suggest_float('learning_rate', 1e-6, 1e-4, log=True),
        'weight_decay': trial.suggest_float('weight_decay', 0.01, 0.05),
        'dropout_rate': trial.suggest_float('dropout_rate', 0.1, 0.3),
        'hidden_size': trial.suggest_int('hidden_size', 128, 512),
        'num_epochs': trial.suggest_int('num_epochs', 5, 15),
        'batch_size': trial.suggest_categorical('batch_size', [16, 32]),
        'warmup_ratio': trial.suggest_float('warmup_ratio', 0.1, 0.2),
        'label_smoothing': trial.suggest_float('label_smoothing', 0.05, 0.15),
        'gradient_accumulation_steps': trial.suggest_int('gradient_accumulation_steps', 1, 4),
        'max_grad_norm': trial.suggest_float('max_grad_norm', 0.5, 1.0)
    }

    # K-fold 교차 검증
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    scores = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(df)):
        # 데이터 분할
        train_texts = df.iloc[train_idx]['text'].values
        val_texts = df.iloc[val_idx]['text'].values
        train_labels = df.iloc[train_idx]['party_label'].values
        val_labels = df.iloc[val_idx]['party_label'].values

        # 데이터셋 생성
        train_dataset = NewsDataset(train_texts, train_labels, tokenizer)
        val_dataset = NewsDataset(val_texts, val_labels, tokenizer)

        # 모델 초기화
        model = NewsBiasModel(
            'klue/roberta-base',
            class_weights=class_weights,
            dropout_rate=params['dropout_rate'],
            hidden_size=params['hidden_size']
        )
        model = model.to(device)

        # 학습 인자 설정
        training_args = TrainingArguments(
            output_dir='./temp_results',
            num_train_epochs=params['num_epochs'],
            per_device_train_batch_size=params['batch_size'],
            per_device_eval_batch_size=params['batch_size'] * 2,
            learning_rate=params['learning_rate'],
            warmup_ratio=params['warmup_ratio'],
            weight_decay=params['weight_decay'],
            logging_dir='./temp_logs',
            logging_steps=100,
            eval_strategy='epoch',
            save_strategy='no',
            load_best_model_at_end=False,
            save_total_limit=0,
            metric_for_best_model='party_f1',
            gradient_accumulation_steps=params['gradient_accumulation_steps'],
            max_grad_norm=params['max_grad_norm'],
            fp16=True,
            label_smoothing_factor=params['label_smoothing'],
            optim='adamw_torch',
            lr_scheduler_type='cosine_with_restarts'
        )

        # 트레이너 초기화
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
            compute_metrics=compute_metrics,
            callbacks=[
                EarlyStoppingCallback(
                    early_stopping_patience=3,
                    early_stopping_threshold=0.001
                )
            ]
        )

        # 학습
        trainer.train()

        # 평가
        metrics = trainer.evaluate()
        scores.append(metrics['eval_party_f1'])

    # 평균 F1 점수 반환
    return np.mean(scores)

In [16]:
# Optuna 스터디 생성 및 최적화 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)  # 20회 시도

# 최적의 하이퍼파라미터 출력
print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2025-06-12 14:08:06,382] A new study created in memory with name: no-name-c6bffc55-db63-4ee2-a3ce-ff7622c5e16d
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.226161,0.214043,0.385
2,No log,1.071501,0.402148,0.48
3,No log,0.884938,0.555275,0.56
4,No log,0.924683,0.619348,0.62
5,No log,0.960719,0.623257,0.625
6,0.899000,0.988218,0.631548,0.625
7,0.899000,1.129396,0.657741,0.655
8,0.899000,1.442347,0.581717,0.585
9,0.899000,1.394198,0.637833,0.635
10,0.899000,1.572592,0.661042,0.665


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.136148,0.162721,0.315
2,No log,1.009193,0.514044,0.51
3,No log,1.163869,0.387933,0.425
4,No log,0.850463,0.63135,0.63
5,No log,1.137863,0.55945,0.55
6,0.935000,1.008349,0.643159,0.64
7,0.935000,1.445064,0.610443,0.615
8,0.935000,1.256246,0.637991,0.635
9,0.935000,1.518466,0.63782,0.635


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.132902,0.197022,0.3
2,No log,1.035844,0.33987,0.39
3,No log,0.836485,0.637636,0.63
4,No log,0.766242,0.695405,0.69
5,No log,0.829412,0.693806,0.69
6,0.944200,0.846521,0.700428,0.695
7,0.944200,0.90826,0.696672,0.695
8,0.944200,1.052446,0.713685,0.71
9,0.944200,1.103842,0.701899,0.7
10,0.944200,1.277548,0.728244,0.725


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.125041,0.240407,0.39
2,No log,1.046825,0.374443,0.47
3,No log,0.875929,0.615862,0.62
4,No log,0.878815,0.623234,0.63
5,No log,0.808468,0.701524,0.7
6,0.935600,0.917728,0.684138,0.69
7,0.935600,1.195885,0.646543,0.635
8,0.935600,1.216482,0.673127,0.675


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.220283,0.183286,0.31
2,No log,1.02668,0.229859,0.355
3,No log,0.962984,0.559084,0.58
4,No log,0.905439,0.625324,0.63
5,No log,0.927623,0.633953,0.64
6,0.922200,0.959931,0.64859,0.65
7,0.922200,0.969767,0.665074,0.665
8,0.922200,1.120182,0.655542,0.655
9,0.922200,1.26282,0.665713,0.665
10,0.922200,1.342542,0.669746,0.67


[I 2025-06-12 14:13:28,657] Trial 0 finished with value: 0.6588020991210096 and parameters: {'learning_rate': 2.2811856979577917e-05, 'weight_decay': 0.04634849133877248, 'dropout_rate': 0.1541664707989632, 'hidden_size': 445, 'num_epochs': 14, 'batch_size': 16, 'warmup_ratio': 0.18247831074494578, 'label_smoothing': 0.14960649996874997, 'gradient_accumulation_steps': 3, 'max_grad_norm': 0.8646027558238945}. Best is trial 0 with value: 0.6588020991210096.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.619121,0.39664,0.475
2,No log,1.116985,0.130388,0.29
3,No log,1.047742,0.427181,0.51
4,No log,0.916807,0.543651,0.535
5,No log,0.910867,0.560903,0.58
6,No log,0.87056,0.606067,0.61
7,No log,0.861334,0.620281,0.62


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.286486,0.263728,0.435
2,No log,1.097295,0.336791,0.395
3,No log,1.033319,0.477459,0.545
4,No log,0.960072,0.528649,0.52
5,No log,0.905378,0.565988,0.565
6,No log,0.874425,0.597702,0.585
7,No log,0.873775,0.616863,0.605


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.099792,0.305932,0.475
2,No log,1.069656,0.305932,0.475
3,No log,0.995821,0.549865,0.565
4,No log,0.917389,0.585755,0.575
5,No log,0.873893,0.60909,0.595
6,No log,0.864944,0.62953,0.62
7,No log,0.856699,0.651102,0.645


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.328227,0.223692,0.395
2,No log,1.224172,0.138305,0.26
3,No log,1.126213,0.32455,0.43
4,No log,0.963029,0.569199,0.6
5,No log,0.921077,0.60474,0.61
6,No log,0.899998,0.58135,0.565
7,No log,0.885648,0.628028,0.62


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.206808,0.23844,0.41
2,No log,1.154752,0.23844,0.41
3,No log,1.007647,0.557032,0.565
4,No log,0.940009,0.538937,0.54
5,No log,0.847468,0.607696,0.61
6,No log,0.798104,0.629018,0.625
7,No log,0.796207,0.61314,0.61


[I 2025-06-12 14:16:34,610] Trial 1 finished with value: 0.6258828747510922 and parameters: {'learning_rate': 3.133947795896783e-05, 'weight_decay': 0.04083017396810494, 'dropout_rate': 0.1202974223825031, 'hidden_size': 146, 'num_epochs': 7, 'batch_size': 32, 'warmup_ratio': 0.1401193507889389, 'label_smoothing': 0.07038566321357807, 'gradient_accumulation_steps': 4, 'max_grad_norm': 0.561724404357534}. Best is trial 0 with value: 0.6588020991210096.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. 

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.119475,0.214043,0.385
2,No log,0.994438,0.450849,0.445
3,No log,0.877567,0.593672,0.59
4,No log,1.135283,0.560968,0.575
5,No log,1.416968,0.586454,0.59
6,No log,1.146484,0.596649,0.6
7,No log,1.591803,0.623194,0.625
8,0.711100,1.536401,0.619103,0.62
9,0.711100,1.688066,0.63381,0.64
10,0.711100,1.787605,0.63649,0.635


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.061433,0.324025,0.405
2,No log,1.023458,0.360233,0.405
3,No log,0.95438,0.501072,0.495
4,No log,0.807069,0.628764,0.63
5,No log,0.968337,0.570902,0.565
6,No log,1.202825,0.585658,0.58
7,No log,1.237039,0.645887,0.645
8,0.750400,1.584367,0.614694,0.615
9,0.750400,1.865935,0.621783,0.625
10,0.750400,1.984056,0.633956,0.63


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.076898,0.414797,0.455
2,No log,0.987897,0.495407,0.565
3,No log,0.916549,0.436035,0.49
4,No log,0.840177,0.636397,0.63
5,No log,0.878423,0.603585,0.595
6,No log,0.874635,0.670195,0.665
7,No log,1.033325,0.678161,0.675
8,0.859500,1.063502,0.658249,0.65
9,0.859500,1.068438,0.708968,0.705
10,0.859500,1.1847,0.682271,0.675


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.128204,0.199854,0.37
2,No log,0.958287,0.568747,0.59
3,No log,0.881377,0.554356,0.545
4,No log,0.856282,0.654253,0.665
5,No log,0.925581,0.624353,0.64
6,No log,1.182219,0.691662,0.695
7,No log,1.227017,0.634242,0.63
8,0.712300,1.341338,0.691646,0.695
9,0.712300,1.358142,0.705414,0.705
10,0.712300,1.576213,0.668763,0.67


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.10155,0.254911,0.365
2,No log,0.971293,0.531778,0.565
3,No log,0.997063,0.587758,0.595
4,No log,0.877698,0.639926,0.64
5,No log,1.176949,0.58814,0.59
6,No log,1.372051,0.637597,0.64
7,No log,1.40058,0.654937,0.655
8,0.631300,1.528887,0.674753,0.675
9,0.631300,1.659222,0.675613,0.675
10,0.631300,1.853882,0.654956,0.655


[I 2025-06-12 14:21:36,864] Trial 2 finished with value: 0.6677029037325741 and parameters: {'learning_rate': 9.651505865174171e-05, 'weight_decay': 0.04955116071134053, 'dropout_rate': 0.24041557923241777, 'hidden_size': 147, 'num_epochs': 12, 'batch_size': 32, 'warmup_ratio': 0.17907146642763758, 'label_smoothing': 0.11720574361739745, 'gradient_accumulation_steps': 2, 'max_grad_norm': 0.8531644146273072}. Best is trial 2 with value: 0.6677029037325741.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.577428,0.159434,0.325
2,No log,1.233223,0.214043,0.385
3,No log,1.078759,0.271545,0.375
4,No log,1.046096,0.487109,0.495
5,No log,1.012228,0.467911,0.475
6,No log,0.961756,0.505368,0.51
7,No log,0.929333,0.551477,0.545
8,No log,0.930397,0.546345,0.545
9,No log,0.90597,0.586899,0.58
10,No log,0.89587,0.589288,0.585


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.506889,0.150913,0.315
2,No log,1.20457,0.164867,0.28
3,No log,1.075389,0.415846,0.48
4,No log,1.060813,0.316873,0.385
5,No log,0.984199,0.47848,0.52
6,No log,0.925954,0.549318,0.545
7,No log,0.885914,0.591944,0.59
8,No log,0.889967,0.597914,0.595
9,No log,0.874202,0.589698,0.585
10,No log,0.870613,0.580769,0.575


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.682682,0.12642,0.285
2,No log,1.143272,0.305932,0.475
3,No log,1.07856,0.237893,0.345
4,No log,1.046427,0.516171,0.51
5,No log,0.983387,0.52319,0.53
6,No log,0.921799,0.594638,0.585
7,No log,0.919207,0.575881,0.565
8,No log,0.85934,0.669447,0.665
9,No log,0.862184,0.63276,0.62
10,No log,0.833863,0.656084,0.65


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.494871,0.199854,0.37
2,No log,1.277125,0.223692,0.395
3,No log,1.0779,0.209116,0.29
4,No log,1.036497,0.417927,0.48
5,No log,0.982911,0.426804,0.45
6,No log,0.909248,0.582344,0.585
7,No log,0.91069,0.532735,0.53
8,No log,0.83027,0.647406,0.645
9,No log,0.875123,0.565448,0.565
10,No log,0.804564,0.66088,0.66


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.650568,0.118627,0.275
2,No log,1.130744,0.310609,0.39
3,No log,1.091609,0.303938,0.385
4,No log,1.025047,0.400463,0.475
5,No log,0.977514,0.483856,0.515
6,No log,0.933121,0.549359,0.555
7,No log,0.905643,0.562182,0.56
8,No log,0.896904,0.600304,0.6
9,No log,0.881793,0.568605,0.58
10,No log,0.858544,0.638,0.635


[I 2025-06-12 14:26:56,702] Trial 3 finished with value: 0.6277723333781332 and parameters: {'learning_rate': 6.987639873144666e-06, 'weight_decay': 0.017094706916011178, 'dropout_rate': 0.10087907262027507, 'hidden_size': 144, 'num_epochs': 13, 'batch_size': 32, 'warmup_ratio': 0.13995019473152928, 'label_smoothing': 0.0559420012715856, 'gradient_accumulation_steps': 3, 'max_grad_norm': 0.7170311072416031}. Best is trial 2 with value: 0.6677029037325741.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.155858,0.259287,0.395
2,No log,1.113568,0.459421,0.51
3,No log,0.901725,0.603221,0.6
4,No log,1.012784,0.574291,0.575
5,No log,0.954857,0.630119,0.63
6,0.860000,0.962455,0.626159,0.625
7,0.860000,1.185053,0.606238,0.605
8,0.860000,1.41478,0.596206,0.6


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.110286,0.219448,0.31
2,No log,0.89381,0.590545,0.585
3,No log,0.912044,0.569637,0.56
4,No log,0.920027,0.618393,0.62
5,No log,0.963891,0.639877,0.635
6,0.900700,0.967525,0.654647,0.65
7,0.900700,1.343631,0.628865,0.625
8,0.900700,1.434728,0.657061,0.655
9,0.900700,1.634727,0.63779,0.635
10,0.900700,1.681892,0.657459,0.655


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.072088,0.285949,0.325
2,No log,0.908563,0.564369,0.585
3,No log,0.868306,0.557555,0.55
4,No log,0.738167,0.730342,0.725
5,No log,0.86958,0.665596,0.655
6,0.926700,0.981769,0.686928,0.68
7,0.926700,0.985119,0.728764,0.725


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.088857,0.282899,0.39
2,No log,0.96309,0.463916,0.495
3,No log,0.798405,0.66737,0.67
4,No log,0.777105,0.685982,0.69
5,No log,0.879345,0.696881,0.705
6,0.908900,0.908555,0.676952,0.675
7,0.908900,1.578803,0.605596,0.6
8,0.908900,1.148511,0.673355,0.67


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.078252,0.150913,0.315
2,No log,0.915004,0.54609,0.565
3,No log,0.850156,0.638578,0.64
4,No log,0.900853,0.633345,0.635
5,No log,0.886732,0.670174,0.67
6,0.888700,1.022258,0.660492,0.66
7,0.888700,1.144478,0.658955,0.66
8,0.888700,1.325257,0.629858,0.63


[I 2025-06-12 14:31:02,188] Trial 4 finished with value: 0.6593528665562608 and parameters: {'learning_rate': 2.835108248234944e-05, 'weight_decay': 0.048460296065866264, 'dropout_rate': 0.1144030786056872, 'hidden_size': 349, 'num_epochs': 12, 'batch_size': 16, 'warmup_ratio': 0.10653923779942816, 'label_smoothing': 0.07695329086683819, 'gradient_accumulation_steps': 3, 'max_grad_norm': 0.6039960735807877}. Best is trial 2 with value: 0.6677029037325741.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.113647,0.237919,0.36
2,No log,1.126334,0.214043,0.385
3,No log,1.058862,0.392205,0.455
4,No log,1.0487,0.399521,0.48
5,No log,0.952011,0.534496,0.525
6,No log,0.937986,0.549447,0.55
7,No log,0.935366,0.594982,0.605
8,No log,0.906028,0.56052,0.56
9,No log,0.882145,0.603329,0.6
10,No log,0.856257,0.653829,0.65


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.107864,0.118741,0.255
2,No log,1.044763,0.401816,0.44
3,No log,0.952605,0.537837,0.56
4,No log,0.889281,0.60587,0.6
5,No log,0.882809,0.623286,0.62
6,No log,0.866227,0.655413,0.65
7,No log,0.891658,0.639672,0.635
8,No log,0.911546,0.637914,0.64
9,No log,0.947195,0.617519,0.61


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.087818,0.324784,0.34
2,No log,1.076093,0.113246,0.25
3,No log,1.055591,0.401887,0.465
4,No log,0.972335,0.455696,0.46
5,No log,0.890193,0.56356,0.545
6,No log,0.890196,0.642602,0.645
7,No log,0.868733,0.611468,0.595
8,No log,0.813199,0.693622,0.69
9,No log,0.849048,0.674,0.665
10,No log,0.830556,0.674235,0.665


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.119817,0.289041,0.4
2,No log,1.126393,0.223692,0.395
3,No log,1.090221,0.22771,0.315
4,No log,1.008408,0.536464,0.56
5,No log,0.966942,0.465361,0.465
6,No log,0.935267,0.575696,0.59
7,No log,0.884884,0.567017,0.555
8,No log,0.782046,0.656263,0.665
9,No log,0.854129,0.620328,0.625
10,No log,0.740619,0.696237,0.7


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.085381,0.2932,0.395
2,No log,1.042706,0.374071,0.455
3,No log,0.946495,0.559559,0.555
4,No log,0.91937,0.532613,0.555
5,No log,0.911138,0.598695,0.605
6,No log,0.88853,0.578172,0.58
7,No log,0.82601,0.624741,0.625
8,No log,0.858876,0.630763,0.63
9,No log,0.826023,0.640591,0.64
10,No log,0.863175,0.652813,0.65


[I 2025-06-12 14:36:26,594] Trial 5 finished with value: 0.6692425446811727 and parameters: {'learning_rate': 1.5137637934759353e-05, 'weight_decay': 0.03447578824135196, 'dropout_rate': 0.2977488868871223, 'hidden_size': 256, 'num_epochs': 14, 'batch_size': 32, 'warmup_ratio': 0.10028770175102161, 'label_smoothing': 0.10533222640303797, 'gradient_accumulation_steps': 4, 'max_grad_norm': 0.5616130382159785}. Best is trial 5 with value: 0.6692425446811727.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.093001,0.264646,0.345
2,No log,1.047217,0.495767,0.515
3,No log,0.860147,0.626184,0.625
4,1.084000,1.129576,0.540167,0.555
5,1.084000,1.142736,0.574436,0.585
6,1.084000,1.345175,0.606832,0.605


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.065202,0.400686,0.465
2,No log,1.029064,0.43146,0.44
3,No log,0.995599,0.568724,0.56
4,1.079700,0.851617,0.636762,0.635
5,1.079700,1.093922,0.627421,0.625
6,1.079700,1.247547,0.577169,0.57
7,1.079700,1.505824,0.599973,0.595


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.135692,0.092903,0.24
2,No log,0.9593,0.589203,0.59
3,No log,0.883835,0.59657,0.62
4,1.096600,0.810478,0.716399,0.715
5,1.096600,0.920777,0.693701,0.69
6,1.096600,1.162696,0.658762,0.65
7,1.096600,1.369757,0.654743,0.645


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.10008,0.164023,0.27
2,No log,0.974128,0.472077,0.55
3,No log,0.925656,0.53978,0.575
4,1.113000,0.895662,0.613436,0.625
5,1.113000,0.864436,0.681725,0.68
6,1.113000,1.043967,0.698968,0.7
7,1.113000,1.557952,0.660733,0.65
8,0.350300,1.79098,0.65445,0.655
9,0.350300,2.242987,0.667147,0.67


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.109765,0.150913,0.315
2,No log,0.873314,0.571949,0.575
3,No log,0.818112,0.629017,0.63
4,1.045200,1.087917,0.585895,0.59
5,1.045200,0.939994,0.639662,0.64
6,1.045200,1.287055,0.679024,0.68
7,1.045200,1.428248,0.67005,0.67
8,0.314600,2.125236,0.652095,0.655
9,0.314600,2.359907,0.654464,0.655


[I 2025-06-12 14:40:07,633] Trial 6 finished with value: 0.63663193115006 and parameters: {'learning_rate': 4.201822424521997e-05, 'weight_decay': 0.019207922863504413, 'dropout_rate': 0.19454269094658716, 'hidden_size': 426, 'num_epochs': 13, 'batch_size': 16, 'warmup_ratio': 0.16780517180182053, 'label_smoothing': 0.09574677314398865, 'gradient_accumulation_steps': 2, 'max_grad_norm': 0.6288302687016446}. Best is trial 5 with value: 0.6692425446811727.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tru

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.362901,0.159434,0.325
2,No log,1.147073,0.289623,0.355
3,No log,1.097476,0.315552,0.385
4,No log,1.085575,0.302937,0.365
5,No log,1.073026,0.422169,0.43
6,No log,1.071828,0.360866,0.395
7,No log,1.070907,0.386836,0.435
8,No log,1.074192,0.379193,0.44


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.578788,0.1,0.25
2,No log,1.148229,0.303083,0.355
3,No log,1.086323,0.28291,0.44
4,No log,1.071992,0.482248,0.48
5,No log,1.070226,0.337511,0.37
6,No log,1.061314,0.440047,0.435
7,No log,1.055244,0.465788,0.46


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.627539,0.092903,0.24
2,No log,1.132492,0.100088,0.235
3,No log,1.095779,0.317507,0.425
4,No log,1.088576,0.412208,0.44
5,No log,1.084996,0.388786,0.385
6,No log,1.082155,0.365888,0.375
7,No log,1.076777,0.405321,0.405


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.647926,0.089433,0.235
2,No log,1.136995,0.150235,0.26
3,No log,1.108263,0.247549,0.395
4,No log,1.106728,0.273154,0.38
5,No log,1.102123,0.287615,0.365
6,No log,1.089096,0.307581,0.39
7,No log,1.078342,0.321402,0.405
8,No log,1.073227,0.334613,0.415
9,No log,1.06898,0.354908,0.425
10,No log,1.06944,0.345148,0.42


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.130656,0.225098,0.31
2,No log,1.131219,0.274742,0.325
3,No log,1.139247,0.23844,0.41
4,No log,1.105776,0.258262,0.415
5,No log,1.090908,0.381844,0.44
6,No log,1.085361,0.3831,0.44
7,No log,1.081035,0.305309,0.42
8,No log,1.081369,0.269701,0.405
9,No log,1.079591,0.269701,0.405


[I 2025-06-12 14:43:51,268] Trial 7 finished with value: 0.37302994400622685 and parameters: {'learning_rate': 1.665703692818204e-06, 'weight_decay': 0.041950979902312634, 'dropout_rate': 0.18659220105042623, 'hidden_size': 451, 'num_epochs': 11, 'batch_size': 32, 'warmup_ratio': 0.11795330512813049, 'label_smoothing': 0.08298708255120932, 'gradient_accumulation_steps': 3, 'max_grad_norm': 0.5305508606057374}. Best is trial 5 with value: 0.6692425446811727.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.305806,0.214043,0.385
2,No log,1.133381,0.425954,0.505
3,No log,1.146294,0.441679,0.51
4,No log,0.8395,0.6228,0.62
5,No log,0.904469,0.608309,0.61
6,No log,0.924864,0.628999,0.625
7,No log,0.933143,0.620303,0.62
8,No log,0.949663,0.653298,0.65
9,No log,0.949207,0.652134,0.65


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.12527,0.274494,0.44
2,No log,1.059556,0.30798,0.445
3,No log,0.937086,0.545802,0.56
4,No log,0.944721,0.595023,0.585
5,No log,0.947836,0.610823,0.605
6,No log,0.943126,0.623384,0.62
7,No log,0.966867,0.630451,0.625
8,No log,1.007779,0.625283,0.62
9,No log,0.997641,0.620372,0.615


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.102861,0.464732,0.535
2,No log,0.96379,0.424367,0.51
3,No log,0.84266,0.614946,0.61
4,No log,0.778388,0.685035,0.68
5,No log,0.758941,0.687593,0.68
6,No log,0.797056,0.713726,0.71
7,No log,0.855065,0.670373,0.66
8,No log,0.823052,0.705823,0.7
9,No log,0.810726,0.711073,0.705


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.127912,0.333181,0.44
2,No log,1.021124,0.460428,0.54
3,No log,0.858381,0.567283,0.595
4,No log,0.810796,0.630699,0.645
5,No log,0.777416,0.67866,0.68
6,No log,0.739744,0.70671,0.705
7,No log,0.803612,0.721271,0.72
8,No log,0.787011,0.736585,0.735
9,No log,0.78162,0.727269,0.725


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.289103,0.337142,0.415
2,No log,1.092865,0.24911,0.415
3,No log,0.938055,0.55093,0.575
4,No log,0.908064,0.623753,0.62
5,No log,0.879577,0.622138,0.62
6,No log,0.932545,0.657632,0.66
7,No log,0.84877,0.655323,0.655
8,No log,0.851345,0.671266,0.67
9,No log,0.84496,0.663114,0.66


[I 2025-06-12 14:47:50,304] Trial 8 finished with value: 0.6747922887361357 and parameters: {'learning_rate': 3.354100053227975e-05, 'weight_decay': 0.025573937515468943, 'dropout_rate': 0.22975851341070952, 'hidden_size': 317, 'num_epochs': 9, 'batch_size': 32, 'warmup_ratio': 0.11351812674740831, 'label_smoothing': 0.06721970339723499, 'gradient_accumulation_steps': 3, 'max_grad_norm': 0.7154661794845409}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.109483,0.163248,0.295
2,No log,1.081236,0.300887,0.375
3,No log,1.061368,0.465159,0.5
4,1.214400,1.048604,0.424879,0.48
5,1.214400,1.016292,0.479755,0.485
6,1.214400,1.002463,0.507146,0.525
7,1.214400,0.998744,0.51699,0.525
8,1.060000,0.996926,0.519401,0.525


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.099801,0.336755,0.39
2,No log,1.07619,0.292858,0.44
3,No log,1.056125,0.428976,0.485
4,1.181700,1.041454,0.487278,0.49
5,1.181700,1.022972,0.522092,0.52
6,1.181700,1.013708,0.486855,0.485
7,1.181700,1.007867,0.502097,0.5
8,1.087800,1.006761,0.50824,0.505


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.102869,0.215388,0.305
2,No log,1.067734,0.373743,0.475
3,No log,1.05208,0.4729,0.545
4,1.188300,1.041146,0.504302,0.515
5,1.188300,1.024553,0.527544,0.54
6,1.188300,1.021768,0.519183,0.52
7,1.188300,1.014102,0.538888,0.535
8,1.097300,1.013989,0.529094,0.525


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.09952,0.294738,0.325
2,No log,1.101986,0.234363,0.4
3,No log,1.090073,0.332684,0.4
4,1.174400,1.047168,0.510337,0.505
5,1.174400,1.046504,0.414262,0.465
6,1.174400,1.052435,0.389974,0.43
7,1.174400,1.042767,0.413219,0.45


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.18539,0.173764,0.26
2,No log,1.105857,0.249132,0.415
3,No log,1.073372,0.317901,0.435
4,1.248200,1.066596,0.313379,0.445
5,1.248200,1.049038,0.479014,0.515
6,1.248200,1.033413,0.536336,0.545
7,1.248200,1.029178,0.547108,0.56
8,1.080000,1.029246,0.54558,0.56


[I 2025-06-12 14:51:38,240] Trial 9 finished with value: 0.5031068555422149 and parameters: {'learning_rate': 1.7226365895392647e-06, 'weight_decay': 0.03575117493739948, 'dropout_rate': 0.129127888541482, 'hidden_size': 291, 'num_epochs': 8, 'batch_size': 16, 'warmup_ratio': 0.1260990181942334, 'label_smoothing': 0.14657805796516826, 'gradient_accumulation_steps': 2, 'max_grad_norm': 0.8573848066777953}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True.

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.083954,0.35665,0.395
2,No log,1.064354,0.385436,0.47
3,No log,1.01158,0.534788,0.545
4,1.193900,0.977598,0.556937,0.565
5,1.193900,0.962191,0.547807,0.54


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.067603,0.353444,0.44
2,No log,1.035483,0.437541,0.485
3,No log,0.979307,0.487698,0.545
4,1.267800,0.93838,0.54678,0.565
5,1.267800,0.940205,0.547055,0.565


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.084682,0.358663,0.4
2,No log,1.084719,0.258208,0.355
3,No log,1.020698,0.486494,0.545
4,1.266300,0.974084,0.530068,0.54
5,1.266300,0.964584,0.53398,0.55


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.090702,0.294589,0.415
2,No log,1.046308,0.50155,0.58
3,No log,0.989984,0.56117,0.585
4,1.293600,0.944857,0.514941,0.565
5,1.293600,0.940899,0.51405,0.575


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.115436,0.360111,0.42
2,No log,1.065266,0.416809,0.51
3,No log,1.055792,0.400618,0.485
4,1.293600,0.98559,0.466089,0.525
5,1.293600,0.982855,0.473727,0.52


[I 2025-06-12 14:53:58,326] Trial 10 finished with value: 0.5233238286470928 and parameters: {'learning_rate': 4.379435657393825e-06, 'weight_decay': 0.023609942770877274, 'dropout_rate': 0.2478088532278758, 'hidden_size': 350, 'num_epochs': 5, 'batch_size': 32, 'warmup_ratio': 0.1992865573217512, 'label_smoothing': 0.051097152236837425, 'gradient_accumulation_steps': 1, 'max_grad_norm': 0.9775280169285012}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.12072,0.1509,0.3
2,No log,1.054293,0.401051,0.45
3,No log,0.993601,0.473925,0.47
4,No log,0.934215,0.542919,0.54
5,No log,0.92154,0.541437,0.54
6,No log,0.911363,0.558117,0.555
7,No log,0.900035,0.565953,0.56
8,No log,0.893737,0.612688,0.605
9,No log,0.891202,0.61221,0.605


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.141595,0.208076,0.335
2,No log,1.067165,0.462142,0.54
3,No log,1.048061,0.478188,0.505
4,No log,1.012022,0.475549,0.505
5,No log,0.926317,0.609522,0.615
6,No log,0.870887,0.663595,0.66
7,No log,0.856918,0.654007,0.65
8,No log,0.846501,0.653495,0.65
9,No log,0.84414,0.66346,0.66


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.198938,0.19094,0.3
2,No log,1.07451,0.327244,0.405
3,No log,1.057957,0.467754,0.535
4,No log,1.079712,0.251267,0.35
5,No log,0.962813,0.544356,0.585
6,No log,0.911516,0.595926,0.6
7,No log,0.923304,0.594994,0.6
8,No log,0.89179,0.61301,0.61
9,No log,0.888408,0.61301,0.61


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.105905,0.297797,0.375
2,No log,1.070966,0.436408,0.505
3,No log,1.062153,0.380706,0.405
4,No log,1.025935,0.528458,0.525
5,No log,0.971001,0.588005,0.6
6,No log,0.92117,0.606503,0.605
7,No log,0.899864,0.612948,0.61
8,No log,0.893599,0.603811,0.6
9,No log,0.888635,0.627931,0.625


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.167029,0.12247,0.275
2,No log,1.110203,0.355113,0.425
3,No log,1.114895,0.387848,0.465
4,No log,1.093221,0.21818,0.325
5,No log,1.036099,0.404813,0.475
6,No log,0.983522,0.466553,0.505
7,No log,0.935064,0.535693,0.545
8,No log,0.919584,0.562548,0.565
9,No log,0.913361,0.567465,0.57


[I 2025-06-12 14:57:56,012] Trial 11 finished with value: 0.6168150982294225 and parameters: {'learning_rate': 1.2311408045096863e-05, 'weight_decay': 0.0288436168111805, 'dropout_rate': 0.2758494303898992, 'hidden_size': 254, 'num_epochs': 9, 'batch_size': 32, 'warmup_ratio': 0.10008508792501766, 'label_smoothing': 0.11638788969514294, 'gradient_accumulation_steps': 4, 'max_grad_norm': 0.7037440254142179}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tru

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.135907,0.177546,0.31
2,No log,1.111876,0.342498,0.425
3,No log,1.061162,0.343066,0.43
4,No log,1.022045,0.40291,0.485
5,No log,1.001983,0.437092,0.47
6,No log,0.992321,0.507126,0.535
7,No log,0.989436,0.574502,0.58
8,No log,0.962269,0.511169,0.52
9,No log,0.932761,0.629587,0.625
10,No log,0.926401,0.593797,0.59


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.180425,0.263728,0.435
2,No log,1.096218,0.157192,0.275
3,No log,1.060866,0.429045,0.49
4,No log,1.002087,0.550679,0.54
5,No log,0.919481,0.579076,0.575
6,No log,0.880462,0.635268,0.63
7,No log,0.8837,0.649214,0.645
8,No log,0.906125,0.638717,0.635
9,No log,0.929227,0.59737,0.595
10,No log,0.942565,0.646585,0.64


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.159781,0.219428,0.32
2,No log,1.10345,0.327899,0.485
3,No log,1.044941,0.468747,0.56
4,No log,1.00457,0.430381,0.485
5,No log,0.916008,0.61108,0.605
6,No log,0.878666,0.57007,0.575
7,No log,0.903579,0.541379,0.545
8,No log,0.829028,0.664887,0.665
9,No log,0.868561,0.62871,0.62
10,No log,0.805305,0.67886,0.675


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.116407,0.1136,0.235
2,No log,1.254827,0.223692,0.395
3,No log,1.132212,0.311237,0.38
4,No log,1.060276,0.302571,0.43
5,No log,1.002215,0.376748,0.425
6,No log,0.932386,0.481829,0.52
7,No log,0.917459,0.551205,0.545
8,No log,0.805529,0.650388,0.65
9,No log,0.907127,0.594057,0.585
10,No log,0.755164,0.688654,0.69


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.265066,0.118627,0.275
2,No log,1.159778,0.23844,0.41
3,No log,1.048245,0.402596,0.475
4,No log,1.021326,0.403508,0.48
5,No log,0.952651,0.514169,0.52
6,No log,0.878706,0.598942,0.595
7,No log,0.847588,0.617826,0.615
8,No log,0.857227,0.567166,0.575
9,No log,0.835609,0.640931,0.64
10,No log,0.83665,0.640637,0.64


[I 2025-06-12 15:03:31,404] Trial 12 finished with value: 0.6495282720091333 and parameters: {'learning_rate': 1.3064423246575939e-05, 'weight_decay': 0.01041612737182084, 'dropout_rate': 0.2848429518404503, 'hidden_size': 246, 'num_epochs': 15, 'batch_size': 32, 'warmup_ratio': 0.12171607180436328, 'label_smoothing': 0.10940391900079258, 'gradient_accumulation_steps': 4, 'max_grad_norm': 0.6603974203307809}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=T

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.125813,0.214043,0.385
2,No log,0.916534,0.571445,0.565
3,No log,1.041961,0.512273,0.555
4,No log,0.881162,0.626969,0.625
5,No log,0.945986,0.617003,0.62
6,No log,0.969621,0.681236,0.68
7,No log,1.023687,0.607755,0.605
8,No log,1.30892,0.622184,0.625
9,No log,1.319955,0.643176,0.645


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.140289,0.263728,0.435
2,No log,0.990887,0.465603,0.48
3,No log,0.865873,0.619844,0.61
4,No log,1.022571,0.589918,0.58
5,No log,1.047826,0.600292,0.595
6,No log,1.080153,0.634247,0.63
7,No log,1.219882,0.628161,0.625
8,No log,1.246936,0.614154,0.61
9,No log,1.303793,0.642026,0.64
10,No log,1.293279,0.647543,0.645


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.076474,0.265082,0.39
2,No log,0.930793,0.647131,0.65
3,No log,0.812053,0.637982,0.64
4,No log,0.792192,0.701253,0.7
5,No log,0.791996,0.704286,0.7
6,No log,0.878255,0.740161,0.735
7,No log,0.962244,0.686755,0.68
8,No log,1.037343,0.6934,0.69
9,No log,1.089595,0.689511,0.685


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.098883,0.244781,0.405
2,No log,1.060671,0.330356,0.39
3,No log,0.780586,0.686827,0.685
4,No log,0.853787,0.628242,0.645
5,No log,0.897454,0.686762,0.69
6,No log,0.952765,0.676254,0.68


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.243436,0.159304,0.315
2,No log,0.984011,0.434503,0.53
3,No log,0.937058,0.521213,0.56
4,No log,0.945459,0.614723,0.62
5,No log,0.996285,0.617209,0.615
6,No log,0.902953,0.661552,0.66
7,No log,0.926375,0.671931,0.675
8,No log,0.971043,0.64137,0.64
9,No log,1.019912,0.679323,0.68
10,No log,1.032804,0.65934,0.66


[I 2025-06-12 15:07:23,984] Trial 13 finished with value: 0.663164707772766 and parameters: {'learning_rate': 7.514903613428703e-05, 'weight_decay': 0.03153768782726958, 'dropout_rate': 0.2294220848529034, 'hidden_size': 230, 'num_epochs': 10, 'batch_size': 32, 'warmup_ratio': 0.11146603146618232, 'label_smoothing': 0.09407880548397833, 'gradient_accumulation_steps': 4, 'max_grad_norm': 0.7791661418865448}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tru

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.131594,0.130388,0.29
2,No log,1.132437,0.287733,0.375
3,No log,1.092641,0.266503,0.39
4,No log,1.09392,0.394882,0.465
5,No log,1.082858,0.37314,0.45
6,No log,1.08323,0.363734,0.445


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.091208,0.274372,0.335
2,No log,1.079756,0.273175,0.375
3,No log,1.065657,0.295905,0.395
4,No log,1.062876,0.333279,0.41
5,No log,1.057981,0.341808,0.415
6,No log,1.055573,0.357555,0.425


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.08614,0.383368,0.365
2,No log,1.113292,0.214676,0.325
3,No log,1.088757,0.254125,0.355
4,No log,1.107139,0.147501,0.295


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.095284,0.261602,0.34
2,No log,1.07864,0.284472,0.425
3,No log,1.135495,0.223692,0.395
4,No log,1.109969,0.312955,0.425
5,No log,1.087936,0.318768,0.43
6,No log,1.08306,0.323856,0.435


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.0834,0.371136,0.39
2,No log,1.098599,0.193863,0.305
3,No log,1.106627,0.32918,0.41
4,No log,1.125423,0.19282,0.31


[I 2025-06-12 15:09:44,540] Trial 14 finished with value: 0.27709322841492845 and parameters: {'learning_rate': 5.757095913206949e-06, 'weight_decay': 0.028327662177859796, 'dropout_rate': 0.2907527792965786, 'hidden_size': 512, 'num_epochs': 6, 'batch_size': 32, 'warmup_ratio': 0.13248703500208858, 'label_smoothing': 0.12684667012294445, 'gradient_accumulation_steps': 4, 'max_grad_norm': 0.7752072347321184}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=T

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.101363,0.210183,0.31
2,No log,1.142551,0.358129,0.44
3,No log,1.016673,0.508789,0.535
4,No log,1.04202,0.459054,0.48
5,No log,0.945751,0.536517,0.54
6,No log,0.90558,0.595412,0.59
7,No log,0.871702,0.627677,0.625
8,No log,0.87935,0.642842,0.64
9,No log,0.873981,0.637359,0.635


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.145871,0.100402,0.25
2,No log,1.082273,0.319797,0.435
3,No log,1.107279,0.213203,0.3
4,No log,1.021732,0.525032,0.535
5,No log,0.997783,0.501752,0.49
6,No log,0.924145,0.559666,0.55
7,No log,0.949567,0.482575,0.485
8,No log,0.916292,0.529242,0.52
9,No log,0.912159,0.549843,0.54


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.169648,0.095973,0.22
2,No log,1.069299,0.40561,0.51
3,No log,1.107203,0.13681,0.29
4,No log,1.050183,0.334095,0.375
5,No log,0.994932,0.526188,0.51
6,No log,0.911428,0.57523,0.565
7,No log,0.916679,0.578797,0.565
8,No log,0.915181,0.581308,0.565
9,No log,0.907454,0.568077,0.55


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.151628,0.095302,0.225
2,No log,1.089909,0.2632,0.41
3,No log,1.205818,0.089433,0.235
4,No log,1.022179,0.457908,0.51
5,No log,1.011479,0.423577,0.435
6,No log,0.961004,0.446284,0.45
7,No log,1.015392,0.367719,0.42


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.126565,0.170527,0.285
2,No log,1.070428,0.321975,0.43
3,No log,1.110999,0.250707,0.335
4,No log,1.037519,0.417704,0.46
5,No log,0.915109,0.601761,0.6
6,No log,0.873563,0.618835,0.615
7,No log,0.87667,0.619371,0.615
8,No log,0.874957,0.614448,0.61
9,No log,0.869314,0.613233,0.61


[I 2025-06-12 15:13:33,162] Trial 15 finished with value: 0.5472462941401046 and parameters: {'learning_rate': 1.4571590105354247e-05, 'weight_decay': 0.03474537442303617, 'dropout_rate': 0.2581117681037437, 'hidden_size': 310, 'num_epochs': 9, 'batch_size': 32, 'warmup_ratio': 0.1545528049084788, 'label_smoothing': 0.06983596402103912, 'gradient_accumulation_steps': 3, 'max_grad_norm': 0.5107407023329937}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tru

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.078707,0.130388,0.29
2,No log,0.993957,0.528672,0.565
3,No log,0.903315,0.623329,0.63
4,1.083900,1.161958,0.534977,0.55
5,1.083900,1.214049,0.591881,0.59
6,1.083900,1.197063,0.611496,0.605


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.081782,0.201883,0.34
2,No log,0.971935,0.560206,0.555
3,No log,1.094193,0.574114,0.56
4,1.040300,0.912007,0.648699,0.645
5,1.040300,1.10796,0.628266,0.625
6,1.040300,1.278744,0.619093,0.615
7,1.040300,1.523729,0.599694,0.595


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.069338,0.3818,0.41
2,No log,0.877525,0.557365,0.585
3,No log,0.841067,0.572519,0.555
4,1.044400,0.839501,0.67914,0.675
5,1.044400,0.925011,0.66339,0.655
6,1.044400,0.961774,0.68604,0.68
7,1.044400,1.37761,0.642492,0.635
8,0.372200,1.196994,0.671545,0.67
9,0.372200,1.808749,0.665681,0.665


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.08513,0.296188,0.42
2,No log,0.826804,0.604568,0.605
3,No log,0.789016,0.652688,0.655
4,1.084700,0.81362,0.629064,0.635
5,1.084700,0.789505,0.685546,0.69
6,1.084700,1.042209,0.665437,0.66
7,1.084700,1.529943,0.63109,0.625
8,0.450600,1.472288,0.628271,0.62


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.213847,0.118627,0.275
2,No log,0.924332,0.53105,0.55
3,No log,0.875425,0.634942,0.64
4,1.037200,0.985325,0.635149,0.64
5,1.037200,1.061641,0.630926,0.63
6,1.037200,1.276872,0.627509,0.63


[I 2025-06-12 15:16:51,764] Trial 16 finished with value: 0.626530448308133 and parameters: {'learning_rate': 4.653111128690738e-05, 'weight_decay': 0.024311687765968946, 'dropout_rate': 0.2189175257342198, 'hidden_size': 218, 'num_epochs': 15, 'batch_size': 32, 'warmup_ratio': 0.10042435905519842, 'label_smoothing': 0.08830048580158864, 'gradient_accumulation_steps': 1, 'max_grad_norm': 0.5726156904729816}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.174156,0.130388,0.29
2,No log,1.105365,0.285405,0.415
3,No log,1.033824,0.458852,0.5
4,No log,0.984134,0.524233,0.54
5,No log,0.900796,0.589073,0.59
6,No log,0.869865,0.609872,0.605
7,No log,0.926323,0.621017,0.62
8,No log,0.912299,0.62341,0.62
9,No log,0.925975,0.631571,0.63
10,No log,0.930513,0.631226,0.63


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.322982,0.1,0.25
2,No log,1.054894,0.282542,0.44
3,No log,1.111369,0.1,0.25
4,No log,0.916467,0.586933,0.585
5,No log,0.915909,0.596258,0.585
6,No log,0.910237,0.588501,0.59
7,No log,0.906345,0.612671,0.605
8,No log,0.911776,0.638632,0.635
9,No log,0.893456,0.617566,0.61
10,No log,0.898416,0.622434,0.615


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.261317,0.193975,0.275
2,No log,1.064884,0.305932,0.475
3,No log,1.143515,0.092903,0.24
4,No log,0.938416,0.577396,0.565
5,No log,0.965817,0.463969,0.48
6,No log,0.817602,0.660952,0.655
7,No log,0.854021,0.625984,0.615
8,No log,0.804182,0.652725,0.64
9,No log,0.79033,0.671384,0.66
10,No log,0.78925,0.675991,0.665


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.283824,0.089433,0.235
2,No log,1.129806,0.223692,0.395
3,No log,1.315699,0.099672,0.24
4,No log,0.915491,0.575417,0.615
5,No log,0.913854,0.597194,0.6
6,No log,0.898259,0.594997,0.6
7,No log,0.812415,0.66959,0.665
8,No log,0.839531,0.663559,0.655
9,No log,0.805107,0.675813,0.67
10,No log,0.812701,0.676444,0.67


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.272981,0.150913,0.315
2,No log,1.16548,0.23844,0.41
3,No log,1.067828,0.304147,0.38
4,No log,0.982968,0.378805,0.455
5,No log,0.889986,0.586952,0.58
6,No log,0.846399,0.59416,0.59
7,No log,0.807029,0.617825,0.615
8,No log,0.792994,0.627428,0.625
9,No log,0.786718,0.621856,0.62
10,No log,0.784837,0.636771,0.635


[I 2025-06-12 15:21:15,383] Trial 17 finished with value: 0.6485730199474712 and parameters: {'learning_rate': 2.00147507926009e-05, 'weight_decay': 0.03772143158166483, 'dropout_rate': 0.17470248431334984, 'hidden_size': 376, 'num_epochs': 10, 'batch_size': 32, 'warmup_ratio': 0.11352688328265735, 'label_smoothing': 0.10539900622816457, 'gradient_accumulation_steps': 4, 'max_grad_norm': 0.6671840068784973}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.271908,0.130388,0.29
2,No log,0.90707,0.606096,0.605
3,No log,0.948904,0.571749,0.595
4,No log,1.133384,0.601182,0.6
5,No log,1.022634,0.615336,0.615
6,0.788900,1.143108,0.625807,0.625
7,0.788900,1.17864,0.611275,0.61


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.06348,0.394285,0.45
2,No log,0.862786,0.59758,0.595
3,No log,0.984104,0.621776,0.615
4,No log,0.944642,0.631958,0.63
5,No log,1.167712,0.615694,0.61
6,0.784800,1.193297,0.623583,0.62
7,0.784800,1.245331,0.624299,0.62


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.207153,0.092903,0.24
2,No log,0.867868,0.664719,0.665
3,No log,0.912228,0.566714,0.58
4,No log,0.870921,0.676813,0.67
5,No log,0.851297,0.671237,0.66
6,0.850500,1.001062,0.678968,0.675
7,0.850500,0.982541,0.6724,0.665


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.052208,0.273172,0.34
2,No log,0.827567,0.570497,0.58
3,No log,0.769094,0.682116,0.675
4,No log,0.753547,0.696718,0.7
5,No log,0.830157,0.759232,0.76
6,0.830200,0.94073,0.678744,0.675
7,0.830200,0.97218,0.693253,0.69


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.082725,0.150913,0.315
2,No log,1.005568,0.4008,0.47
3,No log,0.962968,0.565131,0.575
4,No log,0.88503,0.601891,0.605
5,No log,0.844023,0.642223,0.64
6,0.890900,0.883177,0.669185,0.665
7,0.890900,0.895821,0.679085,0.675


[I 2025-06-12 15:24:37,406] Trial 18 finished with value: 0.6560625980067731 and parameters: {'learning_rate': 4.97838711386792e-05, 'weight_decay': 0.022150635558485902, 'dropout_rate': 0.21321459275981544, 'hidden_size': 195, 'num_epochs': 7, 'batch_size': 16, 'warmup_ratio': 0.15490459602379436, 'label_smoothing': 0.06222857168619028, 'gradient_accumulation_steps': 3, 'max_grad_norm': 0.8074908546108185}. Best is trial 8 with value: 0.6747922887361357.
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=Tr

Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.329226,0.283577,0.375
2,No log,1.109227,0.188188,0.285
3,No log,1.07316,0.360291,0.42
4,No log,1.066458,0.362453,0.43
5,No log,1.042683,0.36786,0.415
6,No log,1.033797,0.473232,0.515
7,No log,0.986684,0.522785,0.525
8,1.214500,0.976917,0.537624,0.535
9,1.214500,0.97225,0.521965,0.53
10,1.214500,0.967112,0.539434,0.545


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.130109,0.253879,0.41
2,No log,1.124837,0.139339,0.245
3,No log,1.091448,0.192909,0.29
4,No log,1.099813,0.108483,0.25


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.096556,0.365405,0.45
2,No log,1.164724,0.127412,0.285
3,No log,1.062788,0.442806,0.51
4,No log,1.047103,0.532101,0.535
5,No log,1.014991,0.538339,0.54
6,No log,0.974768,0.539863,0.53
7,No log,0.942143,0.600953,0.6
8,1.161200,0.927387,0.591741,0.585
9,1.161200,0.911686,0.584415,0.575
10,1.161200,0.904689,0.585414,0.575


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.770058,0.223692,0.395
2,No log,1.215416,0.170045,0.265
3,No log,1.127652,0.165844,0.27
4,No log,1.123219,0.148699,0.265


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Epoch,Training Loss,Validation Loss,Party F1,Party Accuracy
1,No log,1.133578,0.299158,0.36
2,No log,1.16786,0.118627,0.275
3,No log,1.081936,0.373426,0.45
4,No log,1.06534,0.421058,0.475
5,No log,1.024153,0.486084,0.49
6,No log,0.975105,0.569771,0.565
7,No log,0.945399,0.575444,0.575
8,1.160000,0.919183,0.577688,0.575
9,1.160000,0.89564,0.58846,0.585
10,1.160000,0.891036,0.5987,0.595


[I 2025-06-12 15:28:12,588] Trial 19 finished with value: 0.39416749004862944 and parameters: {'learning_rate': 3.7115502194854245e-06, 'weight_decay': 0.01325740662356341, 'dropout_rate': 0.2999207600288583, 'hidden_size': 280, 'num_epochs': 11, 'batch_size': 32, 'warmup_ratio': 0.12678713956628979, 'label_smoothing': 0.13247546747408104, 'gradient_accumulation_steps': 2, 'max_grad_norm': 0.972587111976976}. Best is trial 8 with value: 0.6747922887361357.


Best trial:
  Value:  0.6747922887361357
  Params: 
    learning_rate: 3.354100053227975e-05
    weight_decay: 0.025573937515468943
    dropout_rate: 0.22975851341070952
    hidden_size: 317
    num_epochs: 9
    batch_size: 32
    warmup_ratio: 0.11351812674740831
    label_smoothing: 0.06721970339723499
    gradient_accumulation_steps: 3
    max_grad_norm: 0.7154661794845409


In [17]:
# 모든 trial의 결과를 DataFrame으로 저장
trials_df = study.trials_dataframe()
trials_df.to_csv('hyperparameter_trials.csv', index=False)
trials_df.head()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batch_size,params_dropout_rate,params_gradient_accumulation_steps,params_hidden_size,params_label_smoothing,params_learning_rate,params_max_grad_norm,params_num_epochs,params_warmup_ratio,params_weight_decay,state
0,0,0.658802,2025-06-12 14:08:06.384102,2025-06-12 14:13:28.657276,0 days 00:05:22.273174,16,0.154166,3,445,0.149606,2.3e-05,0.864603,14,0.182478,0.046348,COMPLETE
1,1,0.625883,2025-06-12 14:13:28.658324,2025-06-12 14:16:34.610115,0 days 00:03:05.951791,32,0.120297,4,146,0.070386,3.1e-05,0.561724,7,0.140119,0.04083,COMPLETE
2,2,0.667703,2025-06-12 14:16:34.610999,2025-06-12 14:21:36.864506,0 days 00:05:02.253507,32,0.240416,2,147,0.117206,9.7e-05,0.853164,12,0.179071,0.049551,COMPLETE
3,3,0.627772,2025-06-12 14:21:36.865404,2025-06-12 14:26:56.702479,0 days 00:05:19.837075,32,0.100879,3,144,0.055942,7e-06,0.717031,13,0.13995,0.017095,COMPLETE
4,4,0.659353,2025-06-12 14:26:56.703408,2025-06-12 14:31:02.187637,0 days 00:04:05.484229,16,0.114403,3,349,0.076953,2.8e-05,0.603996,12,0.106539,0.04846,COMPLETE


In [21]:
print(trials_df.sort_values(by='value', ascending=False).iloc[0])

number                                                         8
value                                                   0.674792
datetime_start                        2025-06-12 14:43:51.269477
datetime_complete                     2025-06-12 14:47:50.303693
duration                                  0 days 00:03:59.034216
params_batch_size                                             32
params_dropout_rate                                     0.229759
params_gradient_accumulation_steps                             3
params_hidden_size                                           317
params_label_smoothing                                   0.06722
params_learning_rate                                    0.000034
params_max_grad_norm                                    0.715466
params_num_epochs                                              9
params_warmup_ratio                                     0.113518
params_weight_decay                                     0.025574
state                    

In [18]:
# 최적의 하이퍼파라미터로 모델 학습
best_params = trial.params

In [19]:
best_params

{'learning_rate': 3.354100053227975e-05,
 'weight_decay': 0.025573937515468943,
 'dropout_rate': 0.22975851341070952,
 'hidden_size': 317,
 'num_epochs': 9,
 'batch_size': 32,
 'warmup_ratio': 0.11351812674740831,
 'label_smoothing': 0.06721970339723499,
 'gradient_accumulation_steps': 3,
 'max_grad_norm': 0.7154661794845409}

## 7. 모델 저장 및 로드

In [None]:
# 모델 저장
trainer.save_model('./news_bias_model')  # 저장 경로 변경
tokenizer.save_pretrained('./news_bias_model')  # 저장 경로 변경

# 모델 로드
def load_model(model_path):
    model = NewsBiasModel('klue/roberta-base')  # 모델 클래스 변경
    model.load_state_dict(torch.load(f'{model_path}/pytorch_model.bin'))
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer

## 8. 새로운 기사에 대한 예측

In [None]:
def predict_news(text, model, tokenizer, device):
    model.eval()
    encoding = tokenizer(
        text,
        add_special_tokens=True,
        max_length=512,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        party_pred = torch.argmax(outputs['party_logits'], dim=1).item()  # stance_logits를 party_logits로 변경

    # 정당 레이블 매핑
    party_mapping = {0: '국민의힘', 1: '민주당', 2: '그외'}
    return party_mapping[party_pred]  # 숫자 레이블을 정당 이름으로 변환하여 반환

In [None]:
def predict_csv_file(csv_path, model, tokenizer, device):
    # CSV 파일 로드
    df = pd.read_csv(csv_path)

    # 예측 결과를 저장할 리스트
    stance_predictions = []
    sentiment_predictions = []

    # 각 텍스트에 대해 예측 수행
    for text in tqdm(df['text'], desc="Predicting"):
        stance_pred, sentiment_pred = predict_news(text, model, tokenizer, device)
        stance_predictions.append(stance_pred)
        sentiment_predictions.append(sentiment_pred)

    # 예측 결과를 DataFrame에 추가
    df['stance_label'] = stance_predictions
    df['sentiment_label'] = sentiment_predictions

    # 예측 결과를 숫자에서 텍스트로 변환
    stance_mapping = {v: k for k, v in stance_mapping.items()}
    sentiment_mapping = {v: k for k, v in sentiment_mapping.items()}

    df['stance'] = df['stance_label'].map(stance_mapping)
    df['sentiment'] = df['sentiment_label'].map(sentiment_mapping)

    # 결과 저장
    output_path = csv_path.replace('.csv', '_predicted.csv')
    df.to_csv(output_path, index=False)

    # 예측 결과 통계 출력
    print(f"\n{csv_path} 예측 결과:")
    print("\n스탠스 예측 분포:")
    print(df['stance'].value_counts())
    print("\n감성 예측 분포:")
    print(df['sentiment'].value_counts())

    return df

In [None]:
# 모델 로드
model, tokenizer = load_model('./news_bias_model')  # 모델 경로 변경
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# CSV 파일 예측
csv_path = "/content/drive/MyDrive/텍스트데이터분석을 위한 딥러닝/팀프로젝트/data/전체1.csv"  # 여기에 실제 CSV 파일 경로를 입력하시면 됩니다
predicted_df = predict_csv_file(csv_path, model, tokenizer, device)
predicted_df.head()

In [None]:
# 저장
predicted_df.to_csv(csv_path, index=False)