# 📄 Document type classification baseline code with WandB Integration



In [1]:

# =============================================================================
# 0. Prepare Environments & Install Libraries
# =============================================================================

# 필요한 라이브러리를 설치합니다.
!pip install -r ../requirements.txt

[0m

In [2]:
# =============================================================================
# 1. Import Libraries & Define Functions
# =============================================================================

import os
import time
import random
import copy

import optuna, math
import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import autocast, GradScaler  # Mixed Precision용

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# WandB 관련 import 추가
import wandb
from datetime import datetime


In [3]:
# =============================================================================
# 1-1. WandB Login and Configuration
# =============================================================================
"""
🚀 팀원 사용 가이드:

1. WandB 계정 생성: https://wandb.ai/signup
2. 이 셀 실행 시 로그인 프롬프트가 나타나면 개인 API 키 입력
3. EXPERIMENT_NAME을 다음과 같이 변경:
   - "member1-baseline"
   - "member2-augmentation-test"  
   - "member3-hyperparameter-tuning"
   등등 각자 다른 이름 사용

4. 팀 대시보드 URL: [여기에 당신의 프로젝트 URL 추가]

⚠️ 주의사항:
- 절대 API 키를 코드에 하드코딩하지 마세요
- EXPERIMENT_NAME만 변경하고 PROJECT_NAME은 그대로 두세요
- 각자 개인 계정으로 로그인해서 실험을 추가하세요
"""

# WandB 로그인 (각자 실행)
try:
    if wandb.api.api_key is None:
        print("WandB에 로그인이 필요합니다.")
        wandb.login()
    else:
        print(f"WandB 로그인 상태: {wandb.api.viewer()['username']}")
except:
    print("WandB 로그인을 진행합니다...")
    wandb.login()

# 프로젝트 설정 (각자 수정할 부분)
PROJECT_NAME = "document-classification-team"  # 모든 팀원 동일
ENTITY = None  # 각자 개인 계정 사용
EXPERIMENT_NAME = "efficientnet-b3-baseline"  # 팀원별로 변경 (예: "member1-hyperopt", "member2-augmentation")

print(f"프로젝트: {PROJECT_NAME}")
print(f"실험명: {EXPERIMENT_NAME}")
print("팀원들은 EXPERIMENT_NAME을 각자 다르게 변경해주세요!")

WandB 로그인 상태: kimsunmin0227
프로젝트: document-classification-team
실험명: efficientnet-b3-baseline
팀원들은 EXPERIMENT_NAME을 각자 다르게 변경해주세요!


In [4]:
# =============================================================================
# 3. Seed & basic augmentations (Mixup)
# =============================================================================

# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True


In [5]:

# =============================================================================
# 4. Dataset Class
# =============================================================================

class ImageDataset(Dataset):
    def __init__(self, data, path, transform=None):
        # CSV 파일이면 읽고, DataFrame이면 그대로 사용
        if isinstance(data, str):
            self.df = pd.read_csv(data).values
        else:
            self.df = data.values  # DataFrame을 numpy array로 변환
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target


In [6]:
import random
import math
import numpy as np
import torch
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast
from sklearn.metrics import accuracy_score, f1_score
import wandb

# Cutout (Random Erasing) 함수 정의
def random_erasing(image, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)):
    if random.random() > p:
        return image
    img_c, img_h, img_w = image.shape[1], image.shape[2], image.shape[3]
    area = img_h * img_w
    
    target_area = random.uniform(scale[0], scale[1]) * area
    aspect_ratio = random.uniform(ratio[0], ratio[1])
    h = int(round(math.sqrt(target_area * aspect_ratio)))
    w = int(round(math.sqrt(target_area / aspect_ratio)))
    
    if h < img_h and w < img_w:
        x = random.randint(0, img_w - w)
        y = random.randint(0, img_h - h)
        image[:, :, y:y+h, x:x+w] = 0.0  # 제거된 영역을 0으로 설정
    return image

# RandomCrop 함수 정의
def random_crop(image, crop_size=0.8):
    img_c, img_h, img_w = image.shape[1], image.shape[2], image.shape[3]
    crop_h = int(img_h * crop_size)
    crop_w = int(img_w * crop_size)
    
    if crop_h >= img_h or crop_w >= img_w:
        return image
    
    x = random.randint(0, img_w - crop_w)
    y = random.randint(0, img_h - crop_h)
    cropped_image = image[:, :, y:y+crop_h, x:x+crop_w]
    
    # 원래 이미지 크기로 복원 (패딩 또는 리사이즈)
    cropped_image = torch.nn.functional.interpolate(cropped_image, size=(img_h, img_w), mode='bilinear', align_corners=False)
    return cropped_image

# Mixup 함수 정의
def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).cuda()
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def train_one_epoch(loader, model, optimizer, loss_fn, device, epoch=None, fold=None):
    scaler = GradScaler()
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []

    pbar = tqdm(loader, desc=f"Training Epoch {epoch+1 if epoch else '?'}")
    batch_count = 0
    
    for image, targets in pbar:
        image = image.to(device)
        targets = targets.to(device)
        
        # 증강 기법 선택 (Mixup 25%, Cutout 25%, RandomCrop 25%, None 25%) -> (Mixup 25%, Cutout 25%, RandomCrop 50%)
        aug_type = random.choices(['mixup', 'cutout', 'random_crop'], weights=[0.25, 0.25, 0.5])[0]
        mixup_applied = False
        cutout_applied = False
        random_crop_applied = False
        
        if aug_type == 'mixup':
            mixed_x, y_a, y_b, lam = mixup_data(image, targets, alpha=1.0)
            with autocast(): 
                preds = model(mixed_x)
            loss = lam * loss_fn(preds, y_a) + (1 - lam) * loss_fn(preds, y_b)
            mixup_applied = True
        elif aug_type == 'cutout':
            image = random_erasing(image, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3))
            with autocast(): 
                preds = model(image)
            loss = loss_fn(preds, targets)
            cutout_applied = True
        elif aug_type == 'random_crop':
            image = random_crop(image, crop_size=0.8)
            with autocast(): 
                preds = model(image)
            loss = loss_fn(preds, targets)
            random_crop_applied = True
        else:
            with autocast(): 
                preds = model(image)
            loss = loss_fn(preds, targets)

        model.zero_grad(set_to_none=True)
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        # 배치별 상세 로깅 (100 배치마다)
        if batch_count % 100 == 0 and wandb.run is not None:
            step = epoch * len(loader) + batch_count if epoch is not None else batch_count
            wandb.log({
                f"fold_{fold}/train_batch_loss": loss.item(),
                f"fold_{fold}/mixup_applied": int(mixup_applied),
                f"fold_{fold}/cutout_applied": int(cutout_applied),
                f"fold_{fold}/random_crop_applied": int(random_crop_applied),
                f"fold_{fold}/batch_step": step
            })
        
        batch_count += 1
        pbar.set_description(f"Loss: {loss.item():.4f}, Mixup: {mixup_applied}, Cutout: {cutout_applied}, RandomCrop: {random_crop_applied}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
    }

    return ret

def validate_one_epoch(loader, model, loss_fn, device, epoch=None, fold=None, log_confusion=False):
    model.eval()
    val_loss = 0
    preds_list = []
    targets_list = []
    
    with torch.no_grad():
        pbar = tqdm(loader, desc=f"Validating Epoch {epoch+1 if epoch else '?'}")
        for image, targets in pbar:
            image = image.to(device)
            targets = targets.to(device)
            
            preds = model(image)
            loss = loss_fn(preds, targets)
            
            val_loss += loss.item()
            preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(targets.detach().cpu().numpy())
            
            pbar.set_description(f"Val Loss: {loss.item():.4f}")
    
    val_loss /= len(loader)
    val_acc = accuracy_score(targets_list, preds_list)
    val_f1 = f1_score(targets_list, preds_list, average='macro')
    
    # Confusion Matrix 로깅 (마지막 epoch에만)
    if log_confusion and wandb.run is not None:
        try:
            wandb.log({
                f"fold_{fold}/confusion_matrix": wandb.plot.confusion_matrix(
                    probs=None,
                    y_true=targets_list,
                    preds=preds_list,
                    class_names=[f"Class_{i}" for i in range(17)]
                )
            })
            
            # 클래스별 F1 스코어
            class_f1_scores = f1_score(targets_list, preds_list, average=None)
            for i, class_f1 in enumerate(class_f1_scores):
                wandb.log({f"fold_{fold}/class_{i}_f1": class_f1})
                
        except Exception as e:
            print(f" Confusion matrix 로깅 실패: {e}")
    
    ret = {
        "val_loss": val_loss,
        "val_acc": val_acc,  
        "val_f1": val_f1,
    }
    
    return ret

In [7]:
# =============================================================================
# 6. Hyper-parameters with WandB Config
# =============================================================================

# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f" Using device: {device}")

# data config
data_path = '../data/'

# model config
model_name = 'efficientnet_b3' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 384
LR = 5e-4
EPOCHS = 50
BATCH_SIZE = 32
num_workers = 30

# K-Fold config
N_FOLDS = 5  # 5-fold로 설정

# WandB Config 설정
config = {
    # Model config
    "model_name": model_name,
    "img_size": img_size,
    "num_classes": 17,
    "architecture": "EfficientNet-B3",
    
    # Training config  
    "lr": LR,
    "epochs": EPOCHS,
    "batch_size": BATCH_SIZE,
    "num_workers": num_workers,
    "device": str(device),
    
    # K-Fold config
    "n_folds": N_FOLDS,
    "seed": SEED,
    "cv_strategy": "StratifiedKFold",
    
    # Augmentation & Training techniques
    "mixup_alpha": 1.0,
    "mixup_prob": 0.3,
    "label_smoothing": 0.2,
    "gradient_clipping": 1.0,
    "mixed_precision": True,
    
    # Optimizer & Scheduler
    "optimizer": "Adam",
    "scheduler": "CosineAnnealingLR",
    
    # Data
    "data_path": data_path,
    "train_transforms": "Advanced",
    "test_transforms": "Basic",
}

print(" 하이퍼파라미터 설정 완료!")
print(f" 모델: {model_name}")
print(f" 이미지 크기: {img_size}x{img_size}")
print(f" 배치 크기: {BATCH_SIZE}")
print(f" 학습률: {LR}")
print(f" 에폭: {EPOCHS}")


 Using device: cuda
 하이퍼파라미터 설정 완료!
 모델: efficientnet_b3
 이미지 크기: 384x384
 배치 크기: 32
 학습률: 0.0005
 에폭: 50


In [8]:

# =============================================================================
# 7. Optuna Hyperparameter Tuning (선택적)
# =============================================================================

USE_OPTUNA = False  # True로 바꾸면 튜닝 실행

if USE_OPTUNA:
    print("🔍 Optuna 하이퍼파라미터 튜닝 시작...")
    
    def objective(trial):
        lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
        batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
        
        # WandB에 Optuna 시행 로깅
        optuna_run = wandb.init(
            project=PROJECT_NAME,
            entity=ENTITY,
            name=f"optuna-trial-{trial.number}",
            config={**config, "lr": lr, "batch_size": batch_size},
            tags=["optuna", "hyperparameter-tuning"],
            group="optuna-study",
            job_type="hyperparameter-optimization",
            reinit=True
        )
        
        # 간단한 3-fold CV로 빠른 평가
        skf_simple = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
        fold_scores = []
        
        # 간단한 평가 로직 (실제 구현에서는 더 단순화)
        # ... (Optuna 로직은 복잡하므로 기본적으로 비활성화)
        
        optuna_run.finish()
        return np.random.random()  # placeholder
    
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=10)
    
    # 최적 파라미터 적용
    best_params = study.best_params
    LR = best_params.get('lr', LR)
    BATCH_SIZE = best_params.get('batch_size', BATCH_SIZE)
    config.update(best_params)
    print(f"🎯 Optuna 최적 파라미터: {best_params}")
else:
    print("⏭️ Optuna 튜닝 건너뛰기 (USE_OPTUNA=False)")

⏭️ Optuna 튜닝 건너뛰기 (USE_OPTUNA=False)


In [9]:
# =============================================================================
# 8. Data Transforms
# =============================================================================

# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    # 비율 보존 리사이징 (핵심 개선)
    A.LongestMaxSize(max_size=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, 
                  border_mode=0, value=0),
    
    # 문서 특화 회전 + 미세 회전 추가
    A.OneOf([
        A.Rotate(limit=[90,90], p=1.0),
        A.Rotate(limit=[180,180], p=1.0),
        A.Rotate(limit=[270,270], p=1.0),
        A.Rotate(limit=(-15, 15), p=1.0),  # 미세 회전 추가
    ], p=0.7),
    
    # 기하학적 변환 강화
    A.OneOf([
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=5, p=1.0),
        A.ElasticTransform(alpha=50, sigma=5, p=1.0),
        A.GridDistortion(num_steps=5, distort_limit=0.2, p=1.0),
        A.OpticalDistortion(distort_limit=0.2, shift_limit=0.1, p=1.0),
    ], p=0.6),
    
    # 색상 및 조명 변환 강화
    A.OneOf([
        A.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.3, hue=0.1, p=1.0),
        A.RandomBrightnessContrast(brightness_limit=0.4, contrast_limit=0.4, p=1.0),
        A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), p=1.0),
        A.RandomGamma(gamma_limit=(70, 130), p=1.0),
    ], p=0.9),
    
    # 블러 및 노이즈 강화
    A.OneOf([
        A.MotionBlur(blur_limit=(5, 15), p=1.0),
        A.GaussianBlur(blur_limit=(3, 15), p=1.0),
        A.MedianBlur(blur_limit=7, p=1.0),
        A.Blur(blur_limit=7, p=1.0),
    ], p=0.8),
    
    # 다양한 노이즈 추가
    A.OneOf([
        A.GaussNoise(var_limit=(10.0, 150.0), p=1.0),
        A.ISONoise(color_shift=(0.01, 0.08), intensity=(0.1, 0.8), p=1.0),
        A.MultiplicativeNoise(multiplier=(0.9, 1.1), p=1.0),
    ], p=0.8),
    
    # 문서 품질 시뮬레이션 (스캔/복사 효과)
    A.OneOf([
        A.Downscale(scale_min=0.7, scale_max=0.9, p=1.0),
        A.ImageCompression(quality_lower=60, quality_upper=95, p=1.0),
        A.Posterize(num_bits=6, p=1.0),
    ], p=0.5),
    
    # 픽셀 레벨 변환
    A.OneOf([
        A.ChannelShuffle(p=1.0),
        A.InvertImg(p=1.0),
        A.Solarize(threshold=128, p=1.0),
        A.Equalize(p=1.0),
    ], p=0.3),
    
    # 공간 변환
    A.OneOf([
        A.HorizontalFlip(p=1.0),
        A.VerticalFlip(p=1.0),  # 문서에서도 유용할 수 있음
        A.Transpose(p=1.0),
    ], p=0.6),
    
    # 조각 제거 (Cutout 계열)
    A.OneOf([
        A.CoarseDropout(max_holes=8, max_height=32, max_width=32, 
                       min_holes=1, min_height=8, min_width=8, 
                       fill_value=0, p=1.0),
        A.GridDropout(ratio=0.3, unit_size_min=8, unit_size_max=32, 
                     holes_number_x=5, holes_number_y=5, p=1.0),
    ], p=0.4),
    
    # 최종 정규화
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.LongestMaxSize(max_size=img_size),
    A.PadIfNeeded(min_height=img_size, min_width=img_size, 
                  border_mode=0, value=0),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

print("✅ 데이터 변환 설정 완료!")

✅ 데이터 변환 설정 완료!


In [10]:
# =============================================================================
# 9. Load Data & Start K-Fold Cross Validation with WandB
# =============================================================================

# 전체 학습 데이터 로드
train_df = pd.read_csv("../data/train.csv")
print(f"학습 데이터: {len(train_df)}개 샘플")

# 클래스 분포 확인
class_counts = train_df['target'].value_counts().sort_index()
print(f" 클래스 분포: {dict(class_counts)}")

# K-Fold 설정
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=SEED)

# K-Fold 결과를 저장할 리스트
fold_results = []
fold_models = []  # 각 fold의 최고 성능 모델을 저장

#  WandB 메인 실험 시작
main_run = wandb.init(
    project=PROJECT_NAME,
    entity=ENTITY,
    name=f"{EXPERIMENT_NAME}-{datetime.now().strftime('%m%d-%H%M')}",
    config=config,
    tags=["k-fold-cv", "ensemble", model_name, "baseline", "main-experiment"],
    group="k-fold-experiment",
    job_type="cross-validation",
    notes=f"{N_FOLDS}-Fold Cross Validation with {model_name}"
)

print(f"\n🚀 WandB 실험 시작!")
print(f"📊 대시보드: {main_run.url}")
print(f"📋 실험명: {main_run.name}")

#  데이터셋 정보 로깅
wandb.log({
    "dataset/total_samples": len(train_df),
    "dataset/num_classes": 17,
    "dataset/samples_per_fold": len(train_df) // N_FOLDS,
})

# 클래스 분포 시각화
class_dist_data = [[f"Class_{i}", count] for i, count in enumerate(class_counts)]
wandb.log({
    "dataset/class_distribution": wandb.plot.bar(
        wandb.Table(data=class_dist_data, columns=["Class", "Count"]),
        "Class", "Count", 
        title="Training Data Class Distribution"
    )
})

print(f"\n{'='*60}")
print(f"🎯 {N_FOLDS}-FOLD CROSS VALIDATION 시작")
print(f"{'='*60}")


학습 데이터: 1570개 샘플
 클래스 분포: {0: 100, 1: 46, 2: 100, 3: 100, 4: 100, 5: 100, 6: 100, 7: 100, 8: 100, 9: 100, 10: 100, 11: 100, 12: 100, 13: 74, 14: 50, 15: 100, 16: 100}


[34m[1mwandb[0m: Currently logged in as: [33mkimsunmin0227[0m ([33mkimsunmin0227-hufs[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



🚀 WandB 실험 시작!
📊 대시보드: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/mdvqqydn
📋 실험명: efficientnet-b3-baseline-0904-1341

🎯 5-FOLD CROSS VALIDATION 시작


In [11]:

# =============================================================================
# 10. K-Fold Cross Validation Loop with WandB
# =============================================================================

for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['target'])):
    print(f"\n{'='*50}")
    print(f" FOLD {fold + 1}/{N_FOLDS}")
    print(f"{'='*50}")
    
    # 각 fold별 child run 생성
    fold_run = wandb.init(
        project=PROJECT_NAME,
        entity=ENTITY,
        name=f"fold-{fold+1}-{model_name}-{datetime.now().strftime('%H%M')}",
        config=config,
        tags=["fold", f"fold-{fold+1}", model_name, "child-run"],
        group="k-fold-experiment",
        job_type=f"fold-{fold+1}",
        reinit=True  # 새로운 run 시작 허용
    )
    
    print(f"📊 Fold {fold+1} Dashboard: {fold_run.url}")
    
    # 현재 fold의 train/validation 데이터 분할
    train_fold_df = train_df.iloc[train_idx].reset_index(drop=True)
    val_fold_df = train_df.iloc[val_idx].reset_index(drop=True)
    
    # 데이터 분할 정보 로깅
    wandb.log({
        "fold_info/fold_number": fold + 1,
        "fold_info/train_samples": len(train_fold_df),
        "fold_info/val_samples": len(val_fold_df),
        "fold_info/train_ratio": len(train_fold_df) / len(train_df),
        "fold_info/val_ratio": len(val_fold_df) / len(train_df)
    })
    
    # 현재 fold의 Dataset 생성
    trn_dataset = ImageDataset(
        train_fold_df,
        "../data/train/",
        transform=trn_transform
    )
    
    val_dataset = ImageDataset(
        val_fold_df,
        "../data/train/",
        transform=tst_transform  # 검증에는 증강 적용 안함
    )
    
    # 현재 fold의 DataLoader 생성
    trn_loader = DataLoader(
        trn_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        drop_last=False
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    
    print(f"Train samples: {len(trn_dataset)}, Validation samples: {len(val_dataset)}")
    
    # 모델 초기화 (각 fold마다 새로운 모델)
    model = timm.create_model(
        model_name,
        pretrained=True,
        num_classes=17
    ).to(device)
    
    loss_fn = nn.CrossEntropyLoss(label_smoothing=0.2)  # Label Smoothing 적용
    optimizer = Adam(model.parameters(), lr=LR)
    
    # Learning Rate Scheduler 추가
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    
    # 현재 fold의 최고 성능 추적
    best_val_f1 = 0.0
    best_model = None
    patience = 0
    max_patience = 7
    
    print(f" 모델 학습 시작 - Fold {fold+1}")
    
    # =============================================================================
    # 11. Training Loop for Current Fold
    # =============================================================================
    
    for epoch in range(EPOCHS):
        print(f"\n📈 Epoch {epoch+1}/{EPOCHS}")
        
        # Training
        train_ret = train_one_epoch(
            trn_loader, model, optimizer, loss_fn, device, 
            epoch=epoch, fold=fold+1
        )
        
        # Validation
        val_ret = validate_one_epoch(
            val_loader, model, loss_fn, device, 
            epoch=epoch, fold=fold+1,
            log_confusion=(epoch == EPOCHS-1)  # 마지막 epoch에만 confusion matrix
        )
        
        # Learning rate 로깅
        current_lr = optimizer.param_groups[0]['lr']
        
        # WandB에 metrics 로깅
        log_data = {
            "epoch": epoch + 1,
            "fold": fold + 1,
            "train/loss": train_ret['train_loss'],
            "train/accuracy": train_ret['train_acc'], 
            "train/f1": train_ret['train_f1'],
            "val/loss": val_ret['val_loss'],
            "val/accuracy": val_ret['val_acc'],
            "val/f1": val_ret['val_f1'],
            "learning_rate": current_lr,
            "optimizer/lr": current_lr
        }
        
        # GPU 메모리 사용량 로깅
        if torch.cuda.is_available():
            gpu_memory_used = torch.cuda.memory_allocated(0) / 1e9
            gpu_memory_total = torch.cuda.get_device_properties(0).total_memory / 1e9
            log_data.update({
                "system/gpu_memory_used_gb": gpu_memory_used,
                "system/gpu_memory_total_gb": gpu_memory_total,
                "system/gpu_utilization_pct": (gpu_memory_used / gpu_memory_total) * 100
            })
        
        wandb.log(log_data)
        
        # Scheduler step
        scheduler.step()
        
        print(f" Epoch {epoch+1:2d} | "
              f"Train Loss: {train_ret['train_loss']:.4f} | "
              f"Train F1: {train_ret['train_f1']:.4f} | "
              f"Val Loss: {val_ret['val_loss']:.4f} | "
              f"Val F1: {val_ret['val_f1']:.4f} | "
              f"LR: {current_lr:.2e}")
        
        # 최고 성능 모델 저장
        if val_ret['val_f1'] > best_val_f1:
            best_val_f1 = val_ret['val_f1']
            best_model = copy.deepcopy(model.state_dict())
            patience = 0
            
            # 최고 성능 모델 아티팩트로 저장
            model_path = f'best_model_fold_{fold+1}.pth'
            torch.save(best_model, model_path)
            wandb.save(model_path, policy="now")
            
            # 새로운 최고 성능 로깅
            wandb.log({
                f"best_performance/epoch": epoch + 1,
                f"best_performance/val_f1": best_val_f1,
                f"best_performance/val_acc": val_ret['val_acc'],
                f"best_performance/val_loss": val_ret['val_loss'],
            })
            
            print(f"🎉 새로운 최고 성능! F1: {best_val_f1:.4f}")
        else:
            patience += 1
            
        # Early stopping (선택적)
        if patience >= max_patience and epoch > EPOCHS // 2:
            print(f"⏸️ Early stopping at epoch {epoch+1} (patience: {patience})")
            wandb.log({"early_stopping/epoch": epoch + 1})
            break
    
    # =============================================================================
    # 12. Fold Results Summary
    # =============================================================================
    
    # 현재 fold 결과 저장
    fold_result = {
        'fold': fold + 1,
        'best_val_f1': best_val_f1,
        'final_train_f1': train_ret['train_f1'],
        'train_samples': len(trn_dataset),
        'val_samples': len(val_dataset),
        'epochs_trained': epoch + 1,
        'early_stopped': patience >= max_patience
    }
    
    fold_results.append(fold_result)
    fold_models.append(best_model)
    
    # Fold 최종 요약 로깅
    wandb.log({
        "fold_summary/best_val_f1": best_val_f1,
        "fold_summary/final_train_f1": train_ret['train_f1'],
        "fold_summary/epochs_trained": epoch + 1,
        "fold_summary/improvement": best_val_f1 - val_ret['val_f1'],
        "fold_summary/early_stopped": patience >= max_patience
    })
    
    print(f"\n Fold {fold + 1} 완료!")
    print(f" 최고 Validation F1: {best_val_f1:.4f}")
    print(f" 학습된 에폭: {epoch + 1}/{EPOCHS}")
    
    # Fold run 종료
    wandb.finish()
    
    # 메모리 정리
    del model, optimizer, scheduler, trn_loader, val_loader
    torch.cuda.empty_cache()



 FOLD 1/5


0,1
dataset/num_classes,▁
dataset/samples_per_fold,▁
dataset/total_samples,▁

0,1
dataset/num_classes,17
dataset/samples_per_fold,314
dataset/total_samples,1570




📊 Fold 1 Dashboard: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/bx5b6hog
Train samples: 1256, Validation samples: 314
 모델 학습 시작 - Fold 1

📈 Epoch 1/50


Loss: 1.9688, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:22<00:00,  1.78it/s]
Val Loss: 1.8446: 100%|██████████| 10/10 [00:02<00:00,  4.20it/s]


 Epoch  1 | Train Loss: 2.6804 | Train F1: 0.2319 | Val Loss: 1.7305 | Val F1: 0.6661 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.6661

📈 Epoch 2/50


Loss: 2.4531, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.5317: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]


 Epoch  2 | Train Loss: 2.0021 | Train F1: 0.4900 | Val Loss: 1.4795 | Val F1: 0.7949 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.7949

📈 Epoch 3/50


Loss: 1.5195, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.4500: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]


 Epoch  3 | Train Loss: 1.7830 | Train F1: 0.5823 | Val Loss: 1.3833 | Val F1: 0.8436 | LR: 4.98e-04
🎉 새로운 최고 성능! F1: 0.8436

📈 Epoch 4/50


Loss: 1.6338, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.46it/s]
Val Loss: 1.3547: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch  4 | Train Loss: 1.7137 | Train F1: 0.6073 | Val Loss: 1.3095 | Val F1: 0.8674 | LR: 4.96e-04
🎉 새로운 최고 성능! F1: 0.8674

📈 Epoch 5/50


Loss: 1.3613, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.3083: 100%|██████████| 10/10 [00:01<00:00,  6.00it/s]


 Epoch  5 | Train Loss: 1.6229 | Train F1: 0.6758 | Val Loss: 1.2937 | Val F1: 0.8725 | LR: 4.92e-04
🎉 새로운 최고 성능! F1: 0.8725

📈 Epoch 6/50


Loss: 1.6377, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.2638: 100%|██████████| 10/10 [00:01<00:00,  5.84it/s]


 Epoch  6 | Train Loss: 1.5749 | Train F1: 0.7083 | Val Loss: 1.3079 | Val F1: 0.8428 | LR: 4.88e-04

📈 Epoch 7/50


Loss: 1.2383, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.47it/s]
Val Loss: 1.2842: 100%|██████████| 10/10 [00:01<00:00,  6.35it/s]


 Epoch  7 | Train Loss: 1.5315 | Train F1: 0.7010 | Val Loss: 1.2491 | Val F1: 0.8710 | LR: 4.82e-04

📈 Epoch 8/50


Loss: 1.5859, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.2566: 100%|██████████| 10/10 [00:01<00:00,  5.98it/s]


 Epoch  8 | Train Loss: 1.4285 | Train F1: 0.7979 | Val Loss: 1.2430 | Val F1: 0.9007 | LR: 4.76e-04
🎉 새로운 최고 성능! F1: 0.9007

📈 Epoch 9/50


Loss: 1.6748, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.51it/s]
Val Loss: 1.3260: 100%|██████████| 10/10 [00:01<00:00,  5.99it/s]


 Epoch  9 | Train Loss: 1.4779 | Train F1: 0.7788 | Val Loss: 1.2688 | Val F1: 0.8666 | LR: 4.69e-04

📈 Epoch 10/50


Loss: 1.3213, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.38it/s]
Val Loss: 1.2989: 100%|██████████| 10/10 [00:01<00:00,  6.17it/s]


 Epoch 10 | Train Loss: 1.3983 | Train F1: 0.7824 | Val Loss: 1.2484 | Val F1: 0.8500 | LR: 4.61e-04

📈 Epoch 11/50


Loss: 1.5703, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.2096: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


 Epoch 11 | Train Loss: 1.3786 | Train F1: 0.7555 | Val Loss: 1.1950 | Val F1: 0.9168 | LR: 4.52e-04
🎉 새로운 최고 성능! F1: 0.9168

📈 Epoch 12/50


Loss: 1.2490, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.2226: 100%|██████████| 10/10 [00:01<00:00,  5.86it/s]


 Epoch 12 | Train Loss: 1.3692 | Train F1: 0.7847 | Val Loss: 1.2061 | Val F1: 0.8956 | LR: 4.43e-04

📈 Epoch 13/50


Loss: 1.1934, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.54it/s]
Val Loss: 1.1941: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]


 Epoch 13 | Train Loss: 1.3136 | Train F1: 0.8023 | Val Loss: 1.1767 | Val F1: 0.9271 | LR: 4.32e-04
🎉 새로운 최고 성능! F1: 0.9271

📈 Epoch 14/50


Loss: 1.7422, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.36it/s]
Val Loss: 1.1871: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


 Epoch 14 | Train Loss: 1.4536 | Train F1: 0.7564 | Val Loss: 1.1917 | Val F1: 0.9049 | LR: 4.21e-04

📈 Epoch 15/50


Loss: 1.3037, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1767: 100%|██████████| 10/10 [00:01<00:00,  5.82it/s]


 Epoch 15 | Train Loss: 1.3691 | Train F1: 0.8426 | Val Loss: 1.1875 | Val F1: 0.9231 | LR: 4.09e-04

📈 Epoch 16/50


Loss: 1.1533, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.2072: 100%|██████████| 10/10 [00:01<00:00,  6.35it/s]


 Epoch 16 | Train Loss: 1.3260 | Train F1: 0.8188 | Val Loss: 1.1818 | Val F1: 0.9087 | LR: 3.97e-04

📈 Epoch 17/50


Loss: 1.1641, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.28it/s]
Val Loss: 1.2108: 100%|██████████| 10/10 [00:01<00:00,  5.89it/s]


 Epoch 17 | Train Loss: 1.3163 | Train F1: 0.7983 | Val Loss: 1.1732 | Val F1: 0.9185 | LR: 3.84e-04

📈 Epoch 18/50


Loss: 1.1523, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1906: 100%|██████████| 10/10 [00:01<00:00,  6.35it/s]


 Epoch 18 | Train Loss: 1.3462 | Train F1: 0.7872 | Val Loss: 1.1606 | Val F1: 0.9234 | LR: 3.70e-04

📈 Epoch 19/50


Loss: 1.2734, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.2322: 100%|██████████| 10/10 [00:01<00:00,  6.31it/s]


 Epoch 19 | Train Loss: 1.3168 | Train F1: 0.8193 | Val Loss: 1.1822 | Val F1: 0.9067 | LR: 3.56e-04

📈 Epoch 20/50


Loss: 1.1250, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.51it/s]
Val Loss: 1.2293: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]


 Epoch 20 | Train Loss: 1.3624 | Train F1: 0.7609 | Val Loss: 1.1740 | Val F1: 0.9221 | LR: 3.42e-04

📈 Epoch 21/50


Loss: 1.0986, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.2336: 100%|██████████| 10/10 [00:01<00:00,  6.22it/s]


 Epoch 21 | Train Loss: 1.3597 | Train F1: 0.7303 | Val Loss: 1.1788 | Val F1: 0.9191 | LR: 3.27e-04

📈 Epoch 22/50


Loss: 2.0859, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.44it/s]
Val Loss: 1.1930: 100%|██████████| 10/10 [00:01<00:00,  5.99it/s]


 Epoch 22 | Train Loss: 1.3475 | Train F1: 0.7922 | Val Loss: 1.1696 | Val F1: 0.9258 | LR: 3.12e-04

📈 Epoch 23/50


Loss: 1.3496, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.47it/s]
Val Loss: 1.2118: 100%|██████████| 10/10 [00:01<00:00,  5.99it/s]


 Epoch 23 | Train Loss: 1.2773 | Train F1: 0.8016 | Val Loss: 1.1707 | Val F1: 0.9205 | LR: 2.97e-04

📈 Epoch 24/50


Loss: 1.3613, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1907: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 24 | Train Loss: 1.3701 | Train F1: 0.7768 | Val Loss: 1.1688 | Val F1: 0.9114 | LR: 2.81e-04

📈 Epoch 25/50


Loss: 1.2998, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.2012: 100%|██████████| 10/10 [00:01<00:00,  6.39it/s]


 Epoch 25 | Train Loss: 1.3299 | Train F1: 0.8090 | Val Loss: 1.1698 | Val F1: 0.9100 | LR: 2.66e-04

📈 Epoch 26/50


Loss: 1.2471, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.47it/s]
Val Loss: 1.1948: 100%|██████████| 10/10 [00:01<00:00,  5.99it/s]


 Epoch 26 | Train Loss: 1.2779 | Train F1: 0.7892 | Val Loss: 1.1699 | Val F1: 0.9187 | LR: 2.50e-04

📈 Epoch 27/50


Loss: 1.7148, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1796: 100%|██████████| 10/10 [00:01<00:00,  6.00it/s]

 Epoch 27 | Train Loss: 1.2916 | Train F1: 0.8318 | Val Loss: 1.1642 | Val F1: 0.9141 | LR: 2.34e-04
⏸️ Early stopping at epoch 27 (patience: 14)

 Fold 1 완료!
 최고 Validation F1: 0.9271
 학습된 에폭: 27/50





0,1
best_performance/epoch,▁▂▂▃▃▅▇█
best_performance/val_acc,▁▅▆▇▇▇██
best_performance/val_f1,▁▄▆▆▇▇██
best_performance/val_loss,█▅▄▃▂▂▁▁
early_stopping/epoch,▁
epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
fold_1/batch_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
fold_1/cutout_applied,▁██▁▁█▁██▁▁██▁▁███▁▁▁▁▁▁▁█▁
fold_1/mixup_applied,▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁██▁▁█▁▁

0,1
best_performance/epoch,13
best_performance/val_acc,0.92994
best_performance/val_f1,0.92706
best_performance/val_loss,1.17669
early_stopping/epoch,27
epoch,27
fold,1
fold_1/batch_step,1040
fold_1/cutout_applied,0
fold_1/mixup_applied,0



 FOLD 2/5


📊 Fold 2 Dashboard: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/6ezm341t
Train samples: 1256, Validation samples: 314
 모델 학습 시작 - Fold 2

📈 Epoch 1/50


Loss: 2.7383, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.38it/s]
Val Loss: 1.7013: 100%|██████████| 10/10 [00:01<00:00,  5.92it/s]


 Epoch  1 | Train Loss: 2.7039 | Train F1: 0.2769 | Val Loss: 1.7919 | Val F1: 0.6296 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.6296

📈 Epoch 2/50


Loss: 1.5049, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.4894: 100%|██████████| 10/10 [00:01<00:00,  6.21it/s]


 Epoch  2 | Train Loss: 1.9208 | Train F1: 0.5250 | Val Loss: 1.5198 | Val F1: 0.7612 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.7612

📈 Epoch 3/50


Loss: 1.7480, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.32it/s]
Val Loss: 1.3445: 100%|██████████| 10/10 [00:01<00:00,  5.92it/s]


 Epoch  3 | Train Loss: 1.7352 | Train F1: 0.6231 | Val Loss: 1.4037 | Val F1: 0.8230 | LR: 4.98e-04
🎉 새로운 최고 성능! F1: 0.8230

📈 Epoch 4/50


Loss: 1.5986, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.3615: 100%|██████████| 10/10 [00:01<00:00,  5.90it/s]


 Epoch  4 | Train Loss: 1.6092 | Train F1: 0.6809 | Val Loss: 1.3748 | Val F1: 0.8011 | LR: 4.96e-04

📈 Epoch 5/50


Loss: 1.3613, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.2736: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch  5 | Train Loss: 1.5628 | Train F1: 0.7028 | Val Loss: 1.3066 | Val F1: 0.8416 | LR: 4.92e-04
🎉 새로운 최고 성능! F1: 0.8416

📈 Epoch 6/50


Loss: 1.4541, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.2514: 100%|██████████| 10/10 [00:01<00:00,  6.40it/s]


 Epoch  6 | Train Loss: 1.5157 | Train F1: 0.7432 | Val Loss: 1.2686 | Val F1: 0.8586 | LR: 4.88e-04
🎉 새로운 최고 성능! F1: 0.8586

📈 Epoch 7/50


Loss: 1.4922, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.2196: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch  7 | Train Loss: 1.5309 | Train F1: 0.6739 | Val Loss: 1.2613 | Val F1: 0.8684 | LR: 4.82e-04
🎉 새로운 최고 성능! F1: 0.8684

📈 Epoch 8/50


Loss: 1.2236, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.2344: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch  8 | Train Loss: 1.4564 | Train F1: 0.7748 | Val Loss: 1.2425 | Val F1: 0.8921 | LR: 4.76e-04
🎉 새로운 최고 성능! F1: 0.8921

📈 Epoch 9/50


Loss: 1.2363, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.44it/s]
Val Loss: 1.2433: 100%|██████████| 10/10 [00:01<00:00,  5.90it/s]


 Epoch  9 | Train Loss: 1.3925 | Train F1: 0.8080 | Val Loss: 1.2671 | Val F1: 0.8778 | LR: 4.69e-04

📈 Epoch 10/50


Loss: 2.0820, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.31it/s]
Val Loss: 1.2524: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]


 Epoch 10 | Train Loss: 1.4148 | Train F1: 0.8086 | Val Loss: 1.2622 | Val F1: 0.8826 | LR: 4.61e-04

📈 Epoch 11/50


Loss: 1.6719, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1820: 100%|██████████| 10/10 [00:01<00:00,  6.21it/s]


 Epoch 11 | Train Loss: 1.3909 | Train F1: 0.7425 | Val Loss: 1.2268 | Val F1: 0.8949 | LR: 4.52e-04
🎉 새로운 최고 성능! F1: 0.8949

📈 Epoch 12/50


Loss: 1.5020, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.2283: 100%|██████████| 10/10 [00:01<00:00,  5.95it/s]


 Epoch 12 | Train Loss: 1.4071 | Train F1: 0.8106 | Val Loss: 1.2381 | Val F1: 0.8932 | LR: 4.43e-04

📈 Epoch 13/50


Loss: 1.2559, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.2030: 100%|██████████| 10/10 [00:01<00:00,  6.20it/s]


 Epoch 13 | Train Loss: 1.3826 | Train F1: 0.8115 | Val Loss: 1.2157 | Val F1: 0.9040 | LR: 4.32e-04
🎉 새로운 최고 성능! F1: 0.9040

📈 Epoch 14/50


Loss: 1.4326, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.2870: 100%|██████████| 10/10 [00:01<00:00,  5.98it/s]


 Epoch 14 | Train Loss: 1.4295 | Train F1: 0.7752 | Val Loss: 1.2623 | Val F1: 0.8405 | LR: 4.21e-04

📈 Epoch 15/50


Loss: 1.1934, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1919: 100%|██████████| 10/10 [00:01<00:00,  5.88it/s]


 Epoch 15 | Train Loss: 1.3763 | Train F1: 0.7760 | Val Loss: 1.2058 | Val F1: 0.8816 | LR: 4.09e-04

📈 Epoch 16/50


Loss: 1.7598, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1895: 100%|██████████| 10/10 [00:01<00:00,  5.95it/s]


 Epoch 16 | Train Loss: 1.4502 | Train F1: 0.6776 | Val Loss: 1.1864 | Val F1: 0.9099 | LR: 3.97e-04
🎉 새로운 최고 성능! F1: 0.9099

📈 Epoch 17/50


Loss: 1.1348, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.2520: 100%|██████████| 10/10 [00:01<00:00,  6.27it/s]


 Epoch 17 | Train Loss: 1.3274 | Train F1: 0.8389 | Val Loss: 1.2224 | Val F1: 0.8931 | LR: 3.84e-04

📈 Epoch 18/50


Loss: 1.9785, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.51it/s]
Val Loss: 1.2048: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 18 | Train Loss: 1.3719 | Train F1: 0.8212 | Val Loss: 1.1901 | Val F1: 0.8985 | LR: 3.70e-04

📈 Epoch 19/50


Loss: 1.2842, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1518: 100%|██████████| 10/10 [00:01<00:00,  5.97it/s]


 Epoch 19 | Train Loss: 1.3176 | Train F1: 0.7398 | Val Loss: 1.1886 | Val F1: 0.8868 | LR: 3.56e-04

📈 Epoch 20/50


Loss: 1.0918, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1585: 100%|██████████| 10/10 [00:01<00:00,  6.31it/s]


 Epoch 20 | Train Loss: 1.3726 | Train F1: 0.8114 | Val Loss: 1.1724 | Val F1: 0.9129 | LR: 3.42e-04
🎉 새로운 최고 성능! F1: 0.9129

📈 Epoch 21/50


Loss: 1.1113, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.35it/s]
Val Loss: 1.2178: 100%|██████████| 10/10 [00:01<00:00,  5.88it/s]


 Epoch 21 | Train Loss: 1.2536 | Train F1: 0.8151 | Val Loss: 1.1816 | Val F1: 0.9080 | LR: 3.27e-04

📈 Epoch 22/50


Loss: 1.1611, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.23it/s]
Val Loss: 1.1300: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch 22 | Train Loss: 1.2417 | Train F1: 0.9141 | Val Loss: 1.1766 | Val F1: 0.9030 | LR: 3.12e-04

📈 Epoch 23/50


Loss: 1.2197, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.1440: 100%|██████████| 10/10 [00:01<00:00,  6.27it/s]


 Epoch 23 | Train Loss: 1.3416 | Train F1: 0.7600 | Val Loss: 1.1720 | Val F1: 0.9136 | LR: 2.97e-04
🎉 새로운 최고 성능! F1: 0.9136

📈 Epoch 24/50


Loss: 1.8691, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1511: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]


 Epoch 24 | Train Loss: 1.3341 | Train F1: 0.8095 | Val Loss: 1.1764 | Val F1: 0.9068 | LR: 2.81e-04

📈 Epoch 25/50


Loss: 1.1543, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1747: 100%|██████████| 10/10 [00:01<00:00,  5.83it/s]


 Epoch 25 | Train Loss: 1.3416 | Train F1: 0.7703 | Val Loss: 1.1735 | Val F1: 0.8994 | LR: 2.66e-04

📈 Epoch 26/50


Loss: 1.8086, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.52it/s]
Val Loss: 1.1886: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 26 | Train Loss: 1.2959 | Train F1: 0.8043 | Val Loss: 1.1677 | Val F1: 0.9168 | LR: 2.50e-04
🎉 새로운 최고 성능! F1: 0.9168

📈 Epoch 27/50


Loss: 1.0508, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1333: 100%|██████████| 10/10 [00:01<00:00,  5.98it/s]


 Epoch 27 | Train Loss: 1.2736 | Train F1: 0.8329 | Val Loss: 1.1671 | Val F1: 0.9279 | LR: 2.34e-04
🎉 새로운 최고 성능! F1: 0.9279

📈 Epoch 28/50


Loss: 1.1328, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1440: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 28 | Train Loss: 1.3010 | Train F1: 0.7823 | Val Loss: 1.1631 | Val F1: 0.9204 | LR: 2.19e-04

📈 Epoch 29/50


Loss: 1.1201, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1710: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]


 Epoch 29 | Train Loss: 1.3069 | Train F1: 0.8128 | Val Loss: 1.1637 | Val F1: 0.9115 | LR: 2.03e-04

📈 Epoch 30/50


Loss: 1.2383, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.36it/s]
Val Loss: 1.1294: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 30 | Train Loss: 1.3378 | Train F1: 0.7996 | Val Loss: 1.1463 | Val F1: 0.9291 | LR: 1.88e-04
🎉 새로운 최고 성능! F1: 0.9291

📈 Epoch 31/50


Loss: 1.8877, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1387: 100%|██████████| 10/10 [00:01<00:00,  5.90it/s]


 Epoch 31 | Train Loss: 1.2434 | Train F1: 0.8706 | Val Loss: 1.1447 | Val F1: 0.9297 | LR: 1.73e-04
🎉 새로운 최고 성능! F1: 0.9297

📈 Epoch 32/50


Loss: 1.2598, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.46it/s]
Val Loss: 1.1190: 100%|██████████| 10/10 [00:01<00:00,  6.22it/s]


 Epoch 32 | Train Loss: 1.2548 | Train F1: 0.8861 | Val Loss: 1.1525 | Val F1: 0.9306 | LR: 1.58e-04
🎉 새로운 최고 성능! F1: 0.9306

📈 Epoch 33/50


Loss: 1.0293, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1494: 100%|██████████| 10/10 [00:01<00:00,  5.57it/s]


 Epoch 33 | Train Loss: 1.2109 | Train F1: 0.9142 | Val Loss: 1.1471 | Val F1: 0.9211 | LR: 1.44e-04

📈 Epoch 34/50


Loss: 1.1338, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.1696: 100%|██████████| 10/10 [00:01<00:00,  6.26it/s]


 Epoch 34 | Train Loss: 1.3375 | Train F1: 0.8074 | Val Loss: 1.1533 | Val F1: 0.9299 | LR: 1.30e-04

📈 Epoch 35/50


Loss: 1.1211, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.36it/s]
Val Loss: 1.1442: 100%|██████████| 10/10 [00:01<00:00,  5.97it/s]


 Epoch 35 | Train Loss: 1.2594 | Train F1: 0.8288 | Val Loss: 1.1445 | Val F1: 0.9299 | LR: 1.16e-04

📈 Epoch 36/50


Loss: 1.1094, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1601: 100%|██████████| 10/10 [00:01<00:00,  5.89it/s]


 Epoch 36 | Train Loss: 1.3014 | Train F1: 0.7933 | Val Loss: 1.1483 | Val F1: 0.9353 | LR: 1.03e-04
🎉 새로운 최고 성능! F1: 0.9353

📈 Epoch 37/50


Loss: 1.0439, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1495: 100%|██████████| 10/10 [00:01<00:00,  5.88it/s]


 Epoch 37 | Train Loss: 1.2059 | Train F1: 0.8544 | Val Loss: 1.1460 | Val F1: 0.9411 | LR: 9.06e-05
🎉 새로운 최고 성능! F1: 0.9411

📈 Epoch 38/50


Loss: 1.1621, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.41it/s]
Val Loss: 1.1391: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


 Epoch 38 | Train Loss: 1.2677 | Train F1: 0.8281 | Val Loss: 1.1512 | Val F1: 0.9267 | LR: 7.89e-05

📈 Epoch 39/50


Loss: 1.1699, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1456: 100%|██████████| 10/10 [00:01<00:00,  6.27it/s]


 Epoch 39 | Train Loss: 1.2675 | Train F1: 0.7464 | Val Loss: 1.1475 | Val F1: 0.9310 | LR: 6.78e-05

📈 Epoch 40/50


Loss: 1.8945, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1321: 100%|██████████| 10/10 [00:01<00:00,  5.89it/s]


 Epoch 40 | Train Loss: 1.2760 | Train F1: 0.8662 | Val Loss: 1.1466 | Val F1: 0.9344 | LR: 5.74e-05

📈 Epoch 41/50


Loss: 1.9482, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.33it/s]
Val Loss: 1.1380: 100%|██████████| 10/10 [00:01<00:00,  6.37it/s]


 Epoch 41 | Train Loss: 1.2532 | Train F1: 0.8403 | Val Loss: 1.1404 | Val F1: 0.9412 | LR: 4.77e-05
🎉 새로운 최고 성능! F1: 0.9412

📈 Epoch 42/50


Loss: 1.8887, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.23it/s]
Val Loss: 1.1778: 100%|██████████| 10/10 [00:01<00:00,  6.23it/s]


 Epoch 42 | Train Loss: 1.2611 | Train F1: 0.8900 | Val Loss: 1.1563 | Val F1: 0.9317 | LR: 3.89e-05

📈 Epoch 43/50


Loss: 1.1035, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1334: 100%|██████████| 10/10 [00:01<00:00,  6.26it/s]


 Epoch 43 | Train Loss: 1.1909 | Train F1: 0.8800 | Val Loss: 1.1464 | Val F1: 0.9302 | LR: 3.09e-05

📈 Epoch 44/50


Loss: 1.7578, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.31it/s]
Val Loss: 1.1545: 100%|██████████| 10/10 [00:01<00:00,  6.27it/s]


 Epoch 44 | Train Loss: 1.2135 | Train F1: 0.8979 | Val Loss: 1.1519 | Val F1: 0.9329 | LR: 2.38e-05

📈 Epoch 45/50


Loss: 1.9961, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.51it/s]
Val Loss: 1.1447: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch 45 | Train Loss: 1.2069 | Train F1: 0.8663 | Val Loss: 1.1511 | Val F1: 0.9411 | LR: 1.76e-05

📈 Epoch 46/50


Loss: 1.3447, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.36it/s]
Val Loss: 1.1488: 100%|██████████| 10/10 [00:01<00:00,  5.90it/s]


 Epoch 46 | Train Loss: 1.2337 | Train F1: 0.8611 | Val Loss: 1.1528 | Val F1: 0.9409 | LR: 1.22e-05

📈 Epoch 47/50


Loss: 1.2334, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1423: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 47 | Train Loss: 1.2268 | Train F1: 0.8626 | Val Loss: 1.1484 | Val F1: 0.9409 | LR: 7.85e-06

📈 Epoch 48/50


Loss: 1.5225, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1531: 100%|██████████| 10/10 [00:01<00:00,  5.83it/s]

 Epoch 48 | Train Loss: 1.2988 | Train F1: 0.7891 | Val Loss: 1.1527 | Val F1: 0.9409 | LR: 4.43e-06
⏸️ Early stopping at epoch 48 (patience: 7)

 Fold 2 완료!
 최고 Validation F1: 0.9412
 학습된 에폭: 48/50





0,1
best_performance/epoch,▁▁▁▂▂▂▂▃▃▄▄▅▅▆▆▆▆▇▇█
best_performance/val_acc,▁▄▅▆▇▆▇▇▇▇▇▇▇███████
best_performance/val_f1,▁▄▅▆▆▆▇▇▇▇▇▇▇███████
best_performance/val_loss,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
early_stopping/epoch,▁
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
fold_2/batch_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
fold_2/cutout_applied,▁██▁█▁▁▁▁▁▁▁█▁▁▁▁█▁▁█▁██▁▁█▁▁▁█▁▁██▁▁▁▁▁
fold_2/mixup_applied,█▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁█▁█▁█▁▁▁██▁▁▁

0,1
best_performance/epoch,41
best_performance/val_acc,0.94586
best_performance/val_f1,0.94124
best_performance/val_loss,1.14038
early_stopping/epoch,48
epoch,48
fold,2
fold_2/batch_step,1880
fold_2/cutout_applied,0
fold_2/mixup_applied,0



 FOLD 3/5


📊 Fold 3 Dashboard: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/ailr3rai
Train samples: 1256, Validation samples: 314
 모델 학습 시작 - Fold 3

📈 Epoch 1/50


Loss: 2.0195, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.22it/s]
Val Loss: 1.6158: 100%|██████████| 10/10 [00:01<00:00,  5.86it/s]


 Epoch  1 | Train Loss: 2.7335 | Train F1: 0.2469 | Val Loss: 1.7240 | Val F1: 0.6256 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.6256

📈 Epoch 2/50


Loss: 1.9424, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.4114: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]


 Epoch  2 | Train Loss: 2.0801 | Train F1: 0.4753 | Val Loss: 1.5084 | Val F1: 0.7141 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.7141

📈 Epoch 3/50


Loss: 2.0898, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.38it/s]
Val Loss: 1.3176: 100%|██████████| 10/10 [00:01<00:00,  5.87it/s]


 Epoch  3 | Train Loss: 1.8310 | Train F1: 0.6225 | Val Loss: 1.3955 | Val F1: 0.8199 | LR: 4.98e-04
🎉 새로운 최고 성능! F1: 0.8199

📈 Epoch 4/50


Loss: 2.3984, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.32it/s]
Val Loss: 1.3669: 100%|██████████| 10/10 [00:01<00:00,  6.27it/s]


 Epoch  4 | Train Loss: 1.7421 | Train F1: 0.6532 | Val Loss: 1.3824 | Val F1: 0.8150 | LR: 4.96e-04

📈 Epoch 5/50


Loss: 1.7676, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.41it/s]
Val Loss: 1.3001: 100%|██████████| 10/10 [00:01<00:00,  5.98it/s]


 Epoch  5 | Train Loss: 1.6742 | Train F1: 0.6656 | Val Loss: 1.3280 | Val F1: 0.8546 | LR: 4.92e-04
🎉 새로운 최고 성능! F1: 0.8546

📈 Epoch 6/50


Loss: 1.3730, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.41it/s]
Val Loss: 1.2144: 100%|██████████| 10/10 [00:01<00:00,  5.89it/s]


 Epoch  6 | Train Loss: 1.6762 | Train F1: 0.5853 | Val Loss: 1.2911 | Val F1: 0.8739 | LR: 4.88e-04
🎉 새로운 최고 성능! F1: 0.8739

📈 Epoch 7/50


Loss: 1.7373, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1905: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch  7 | Train Loss: 1.5459 | Train F1: 0.6935 | Val Loss: 1.2620 | Val F1: 0.8705 | LR: 4.82e-04

📈 Epoch 8/50


Loss: 1.4238, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1545: 100%|██████████| 10/10 [00:01<00:00,  6.26it/s]


 Epoch  8 | Train Loss: 1.5054 | Train F1: 0.7209 | Val Loss: 1.2725 | Val F1: 0.8658 | LR: 4.76e-04

📈 Epoch 9/50


Loss: 1.3438, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.1503: 100%|██████████| 10/10 [00:01<00:00,  5.85it/s]


 Epoch  9 | Train Loss: 1.5068 | Train F1: 0.7456 | Val Loss: 1.2382 | Val F1: 0.8726 | LR: 4.69e-04

📈 Epoch 10/50


Loss: 1.6963, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1860: 100%|██████████| 10/10 [00:01<00:00,  5.87it/s]


 Epoch 10 | Train Loss: 1.4159 | Train F1: 0.7679 | Val Loss: 1.2363 | Val F1: 0.8595 | LR: 4.61e-04

📈 Epoch 11/50


Loss: 1.4912, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1411: 100%|██████████| 10/10 [00:01<00:00,  5.89it/s]


 Epoch 11 | Train Loss: 1.5120 | Train F1: 0.7667 | Val Loss: 1.2178 | Val F1: 0.8845 | LR: 4.52e-04
🎉 새로운 최고 성능! F1: 0.8845

📈 Epoch 12/50


Loss: 1.1562, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1331: 100%|██████████| 10/10 [00:01<00:00,  5.95it/s]


 Epoch 12 | Train Loss: 1.4401 | Train F1: 0.7768 | Val Loss: 1.2193 | Val F1: 0.8847 | LR: 4.43e-04
🎉 새로운 최고 성능! F1: 0.8847

📈 Epoch 13/50


Loss: 1.1533, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.31it/s]
Val Loss: 1.2241: 100%|██████████| 10/10 [00:01<00:00,  6.33it/s]


 Epoch 13 | Train Loss: 1.4414 | Train F1: 0.7794 | Val Loss: 1.2362 | Val F1: 0.8665 | LR: 4.32e-04

📈 Epoch 14/50


Loss: 1.0957, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.2325: 100%|██████████| 10/10 [00:01<00:00,  5.87it/s]


 Epoch 14 | Train Loss: 1.4276 | Train F1: 0.7370 | Val Loss: 1.2252 | Val F1: 0.8580 | LR: 4.21e-04

📈 Epoch 15/50


Loss: 1.3086, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.31it/s]
Val Loss: 1.1886: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 15 | Train Loss: 1.4661 | Train F1: 0.7212 | Val Loss: 1.2027 | Val F1: 0.8873 | LR: 4.09e-04
🎉 새로운 최고 성능! F1: 0.8873

📈 Epoch 16/50


Loss: 1.1152, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.1960: 100%|██████████| 10/10 [00:01<00:00,  6.21it/s]


 Epoch 16 | Train Loss: 1.4054 | Train F1: 0.7872 | Val Loss: 1.2021 | Val F1: 0.8855 | LR: 3.97e-04

📈 Epoch 17/50


Loss: 1.3242, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.46it/s]
Val Loss: 1.1524: 100%|██████████| 10/10 [00:01<00:00,  6.23it/s]


 Epoch 17 | Train Loss: 1.3555 | Train F1: 0.7900 | Val Loss: 1.1981 | Val F1: 0.8858 | LR: 3.84e-04

📈 Epoch 18/50


Loss: 1.2158, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1305: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


 Epoch 18 | Train Loss: 1.3076 | Train F1: 0.8421 | Val Loss: 1.1914 | Val F1: 0.8860 | LR: 3.70e-04

📈 Epoch 19/50


Loss: 1.3643, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.1450: 100%|██████████| 10/10 [00:01<00:00,  6.13it/s]


 Epoch 19 | Train Loss: 1.3802 | Train F1: 0.7942 | Val Loss: 1.1984 | Val F1: 0.8869 | LR: 3.56e-04

📈 Epoch 20/50


Loss: 1.7148, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.2077: 100%|██████████| 10/10 [00:01<00:00,  5.86it/s]


 Epoch 20 | Train Loss: 1.4547 | Train F1: 0.7239 | Val Loss: 1.2104 | Val F1: 0.8830 | LR: 3.42e-04

📈 Epoch 21/50


Loss: 1.4395, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.0888: 100%|██████████| 10/10 [00:01<00:00,  5.86it/s]


 Epoch 21 | Train Loss: 1.3839 | Train F1: 0.8348 | Val Loss: 1.1666 | Val F1: 0.9231 | LR: 3.27e-04
🎉 새로운 최고 성능! F1: 0.9231

📈 Epoch 22/50


Loss: 1.1934, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.25it/s]
Val Loss: 1.0886: 100%|██████████| 10/10 [00:01<00:00,  6.00it/s]


 Epoch 22 | Train Loss: 1.4253 | Train F1: 0.7684 | Val Loss: 1.1690 | Val F1: 0.9239 | LR: 3.12e-04
🎉 새로운 최고 성능! F1: 0.9239

📈 Epoch 23/50


Loss: 1.4941, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.52it/s]
Val Loss: 1.0868: 100%|██████████| 10/10 [00:01<00:00,  5.58it/s]


 Epoch 23 | Train Loss: 1.2829 | Train F1: 0.8522 | Val Loss: 1.1708 | Val F1: 0.9129 | LR: 2.97e-04

📈 Epoch 24/50


Loss: 1.4971, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.0769: 100%|██████████| 10/10 [00:01<00:00,  6.22it/s]


 Epoch 24 | Train Loss: 1.3591 | Train F1: 0.8317 | Val Loss: 1.1736 | Val F1: 0.9198 | LR: 2.81e-04

📈 Epoch 25/50


Loss: 2.0625, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1236: 100%|██████████| 10/10 [00:01<00:00,  6.20it/s]


 Epoch 25 | Train Loss: 1.3041 | Train F1: 0.8332 | Val Loss: 1.1923 | Val F1: 0.8958 | LR: 2.66e-04

📈 Epoch 26/50


Loss: 1.3125, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.1370: 100%|██████████| 10/10 [00:01<00:00,  5.92it/s]


 Epoch 26 | Train Loss: 1.2892 | Train F1: 0.8078 | Val Loss: 1.2187 | Val F1: 0.8843 | LR: 2.50e-04

📈 Epoch 27/50


Loss: 1.1719, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.0847: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 27 | Train Loss: 1.3556 | Train F1: 0.7769 | Val Loss: 1.1579 | Val F1: 0.9322 | LR: 2.34e-04
🎉 새로운 최고 성능! F1: 0.9322

📈 Epoch 28/50


Loss: 1.8916, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.33it/s]
Val Loss: 1.1194: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch 28 | Train Loss: 1.3063 | Train F1: 0.8141 | Val Loss: 1.1709 | Val F1: 0.8969 | LR: 2.19e-04

📈 Epoch 29/50


Loss: 1.1631, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1441: 100%|██████████| 10/10 [00:01<00:00,  5.87it/s]


 Epoch 29 | Train Loss: 1.2996 | Train F1: 0.8206 | Val Loss: 1.1778 | Val F1: 0.9135 | LR: 2.03e-04

📈 Epoch 30/50


Loss: 1.2148, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1192: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


 Epoch 30 | Train Loss: 1.2784 | Train F1: 0.8766 | Val Loss: 1.1757 | Val F1: 0.8834 | LR: 1.88e-04

📈 Epoch 31/50


Loss: 1.1797, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1190: 100%|██████████| 10/10 [00:01<00:00,  5.87it/s]


 Epoch 31 | Train Loss: 1.2979 | Train F1: 0.8098 | Val Loss: 1.1735 | Val F1: 0.9044 | LR: 1.73e-04

📈 Epoch 32/50


Loss: 1.1201, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.1129: 100%|██████████| 10/10 [00:01<00:00,  5.88it/s]


 Epoch 32 | Train Loss: 1.3110 | Train F1: 0.7717 | Val Loss: 1.1739 | Val F1: 0.9192 | LR: 1.58e-04

📈 Epoch 33/50


Loss: 1.1074, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1015: 100%|██████████| 10/10 [00:01<00:00,  5.54it/s]


 Epoch 33 | Train Loss: 1.2138 | Train F1: 0.9255 | Val Loss: 1.1707 | Val F1: 0.9161 | LR: 1.44e-04

📈 Epoch 34/50


Loss: 1.2246, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.0987: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]

 Epoch 34 | Train Loss: 1.1834 | Train F1: 0.9057 | Val Loss: 1.1763 | Val F1: 0.9155 | LR: 1.30e-04
⏸️ Early stopping at epoch 34 (patience: 7)

 Fold 3 완료!
 최고 Validation F1: 0.9322
 학습된 에폭: 34/50





0,1
best_performance/epoch,▁▁▂▂▂▄▄▅▆▇█
best_performance/val_acc,▁▄▅▇▇▇▇▇███
best_performance/val_f1,▁▃▅▆▇▇▇▇███
best_performance/val_loss,█▅▄▃▃▂▂▂▁▁▁
early_stopping/epoch,▁
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
fold_3/batch_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
fold_3/cutout_applied,▁▁██▁▁██▁███▁▁▁▁▁▁█▁█▁▁█▁▁▁▁▁▁▁▁▁▁
fold_3/mixup_applied,▁▁▁▁█▁▁▁▁▁▁▁███▁▁▁▁█▁▁▁▁██▁▁█▁██▁▁

0,1
best_performance/epoch,27
best_performance/val_acc,0.93631
best_performance/val_f1,0.93221
best_performance/val_loss,1.1579
early_stopping/epoch,34
epoch,34
fold,3
fold_3/batch_step,1320
fold_3/cutout_applied,0
fold_3/mixup_applied,0



 FOLD 4/5


📊 Fold 4 Dashboard: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/j75cyh6j
Train samples: 1256, Validation samples: 314
 모델 학습 시작 - Fold 4

📈 Epoch 1/50


Loss: 2.1738, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.30it/s]
Val Loss: 1.7969: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]


 Epoch  1 | Train Loss: 2.6602 | Train F1: 0.2756 | Val Loss: 1.8156 | Val F1: 0.6188 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.6188

📈 Epoch 2/50


Loss: 1.9053, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.5285: 100%|██████████| 10/10 [00:01<00:00,  6.25it/s]


 Epoch  2 | Train Loss: 2.0823 | Train F1: 0.4941 | Val Loss: 1.6167 | Val F1: 0.6888 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.6888

📈 Epoch 3/50


Loss: 2.4668, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.4568: 100%|██████████| 10/10 [00:01<00:00,  5.58it/s]


 Epoch  3 | Train Loss: 1.8359 | Train F1: 0.5858 | Val Loss: 1.4577 | Val F1: 0.7760 | LR: 4.98e-04
🎉 새로운 최고 성능! F1: 0.7760

📈 Epoch 4/50


Loss: 2.1855, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.3671: 100%|██████████| 10/10 [00:01<00:00,  6.32it/s]


 Epoch  4 | Train Loss: 1.7341 | Train F1: 0.6738 | Val Loss: 1.4228 | Val F1: 0.7978 | LR: 4.96e-04
🎉 새로운 최고 성능! F1: 0.7978

📈 Epoch 5/50


Loss: 1.5645, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.46it/s]
Val Loss: 1.3733: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch  5 | Train Loss: 1.6048 | Train F1: 0.7126 | Val Loss: 1.3962 | Val F1: 0.8097 | LR: 4.92e-04
🎉 새로운 최고 성능! F1: 0.8097

📈 Epoch 6/50


Loss: 1.9434, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.32it/s]
Val Loss: 1.2597: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch  6 | Train Loss: 1.5595 | Train F1: 0.7029 | Val Loss: 1.2902 | Val F1: 0.8788 | LR: 4.88e-04
🎉 새로운 최고 성능! F1: 0.8788

📈 Epoch 7/50


Loss: 1.7773, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.38it/s]
Val Loss: 1.1641: 100%|██████████| 10/10 [00:01<00:00,  5.57it/s]


 Epoch  7 | Train Loss: 1.5495 | Train F1: 0.7114 | Val Loss: 1.2950 | Val F1: 0.8704 | LR: 4.82e-04

📈 Epoch 8/50


Loss: 2.4316, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.33it/s]
Val Loss: 1.1469: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch  8 | Train Loss: 1.5168 | Train F1: 0.7492 | Val Loss: 1.2717 | Val F1: 0.8930 | LR: 4.76e-04
🎉 새로운 최고 성능! F1: 0.8930

📈 Epoch 9/50


Loss: 1.8555, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.35it/s]
Val Loss: 1.1919: 100%|██████████| 10/10 [00:01<00:00,  5.92it/s]


 Epoch  9 | Train Loss: 1.4769 | Train F1: 0.7197 | Val Loss: 1.2700 | Val F1: 0.8667 | LR: 4.69e-04

📈 Epoch 10/50


Loss: 1.4424, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1624: 100%|██████████| 10/10 [00:01<00:00,  5.88it/s]


 Epoch 10 | Train Loss: 1.4061 | Train F1: 0.8062 | Val Loss: 1.2701 | Val F1: 0.8628 | LR: 4.61e-04

📈 Epoch 11/50


Loss: 2.2031, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1593: 100%|██████████| 10/10 [00:01<00:00,  5.80it/s]


 Epoch 11 | Train Loss: 1.3684 | Train F1: 0.8197 | Val Loss: 1.2775 | Val F1: 0.8751 | LR: 4.52e-04

📈 Epoch 12/50


Loss: 1.3945, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.32it/s]
Val Loss: 1.2008: 100%|██████████| 10/10 [00:01<00:00,  6.26it/s]


 Epoch 12 | Train Loss: 1.4177 | Train F1: 0.7813 | Val Loss: 1.2565 | Val F1: 0.8782 | LR: 4.43e-04

📈 Epoch 13/50


Loss: 2.2578, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.36it/s]
Val Loss: 1.1980: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]


 Epoch 13 | Train Loss: 1.3983 | Train F1: 0.7757 | Val Loss: 1.2477 | Val F1: 0.8965 | LR: 4.32e-04
🎉 새로운 최고 성능! F1: 0.8965

📈 Epoch 14/50


Loss: 1.9336, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.2150: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 14 | Train Loss: 1.4695 | Train F1: 0.7742 | Val Loss: 1.2734 | Val F1: 0.8575 | LR: 4.21e-04

📈 Epoch 15/50


Loss: 1.1240, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1742: 100%|██████████| 10/10 [00:01<00:00,  6.24it/s]


 Epoch 15 | Train Loss: 1.3552 | Train F1: 0.8280 | Val Loss: 1.2583 | Val F1: 0.8720 | LR: 4.09e-04

📈 Epoch 16/50


Loss: 1.5195, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1152: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


 Epoch 16 | Train Loss: 1.3580 | Train F1: 0.8432 | Val Loss: 1.2324 | Val F1: 0.8910 | LR: 3.97e-04

📈 Epoch 17/50


Loss: 1.0918, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1001: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]


 Epoch 17 | Train Loss: 1.4179 | Train F1: 0.7912 | Val Loss: 1.2253 | Val F1: 0.8996 | LR: 3.84e-04
🎉 새로운 최고 성능! F1: 0.8996

📈 Epoch 18/50


Loss: 1.2471, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.46it/s]
Val Loss: 1.1285: 100%|██████████| 10/10 [00:01<00:00,  6.21it/s]


 Epoch 18 | Train Loss: 1.3389 | Train F1: 0.8120 | Val Loss: 1.2211 | Val F1: 0.8898 | LR: 3.70e-04

📈 Epoch 19/50


Loss: 1.5547, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.0848: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 19 | Train Loss: 1.3760 | Train F1: 0.7950 | Val Loss: 1.1977 | Val F1: 0.9180 | LR: 3.56e-04
🎉 새로운 최고 성능! F1: 0.9180

📈 Epoch 20/50


Loss: 2.3457, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.46it/s]
Val Loss: 1.0789: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 20 | Train Loss: 1.3668 | Train F1: 0.8050 | Val Loss: 1.2020 | Val F1: 0.9186 | LR: 3.42e-04
🎉 새로운 최고 성능! F1: 0.9186

📈 Epoch 21/50


Loss: 1.5039, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1250: 100%|██████████| 10/10 [00:01<00:00,  6.33it/s]


 Epoch 21 | Train Loss: 1.2681 | Train F1: 0.8433 | Val Loss: 1.2022 | Val F1: 0.8832 | LR: 3.27e-04

📈 Epoch 22/50


Loss: 1.3330, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.33it/s]
Val Loss: 1.1183: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 22 | Train Loss: 1.4544 | Train F1: 0.7468 | Val Loss: 1.2104 | Val F1: 0.8921 | LR: 3.12e-04

📈 Epoch 23/50


Loss: 1.4307, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1109: 100%|██████████| 10/10 [00:01<00:00,  6.23it/s]


 Epoch 23 | Train Loss: 1.3735 | Train F1: 0.7513 | Val Loss: 1.2019 | Val F1: 0.8948 | LR: 2.97e-04

📈 Epoch 24/50


Loss: 1.2803, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.1070: 100%|██████████| 10/10 [00:01<00:00,  5.58it/s]


 Epoch 24 | Train Loss: 1.2853 | Train F1: 0.8700 | Val Loss: 1.1978 | Val F1: 0.8894 | LR: 2.81e-04

📈 Epoch 25/50


Loss: 1.1641, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.31it/s]
Val Loss: 1.1027: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 25 | Train Loss: 1.2914 | Train F1: 0.8279 | Val Loss: 1.2148 | Val F1: 0.8887 | LR: 2.66e-04

📈 Epoch 26/50


Loss: 1.1719, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.1122: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 26 | Train Loss: 1.3821 | Train F1: 0.7635 | Val Loss: 1.2130 | Val F1: 0.8846 | LR: 2.50e-04

📈 Epoch 27/50


Loss: 1.1914, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.0910: 100%|██████████| 10/10 [00:01<00:00,  5.92it/s]

 Epoch 27 | Train Loss: 1.2598 | Train F1: 0.8366 | Val Loss: 1.2013 | Val F1: 0.8904 | LR: 2.34e-04
⏸️ Early stopping at epoch 27 (patience: 7)

 Fold 4 완료!
 최고 Validation F1: 0.9186
 학습된 에폭: 27/50





0,1
best_performance/epoch,▁▁▂▂▂▃▄▅▇██
best_performance/val_acc,▁▃▅▆▆▇▇▇███
best_performance/val_f1,▁▃▅▅▅▇▇▇███
best_performance/val_loss,█▆▄▄▃▂▂▂▁▁▁
early_stopping/epoch,▁
epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
fold_4/batch_step,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
fold_4/cutout_applied,▁█▁▁▁▁▁▁█▁█▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁
fold_4/mixup_applied,█▁▁█▁▁██▁▁▁▁▁▁██▁▁██▁▁█▁▁██

0,1
best_performance/epoch,20
best_performance/val_acc,0.92675
best_performance/val_f1,0.91859
best_performance/val_loss,1.20204
early_stopping/epoch,27
epoch,27
fold,4
fold_4/batch_step,1040
fold_4/cutout_applied,0
fold_4/mixup_applied,1



 FOLD 5/5


📊 Fold 5 Dashboard: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/bv3qvohp
Train samples: 1256, Validation samples: 314
 모델 학습 시작 - Fold 5

📈 Epoch 1/50


Loss: 1.9355, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.30it/s]
Val Loss: 1.6529: 100%|██████████| 10/10 [00:01<00:00,  6.25it/s]


 Epoch  1 | Train Loss: 2.7624 | Train F1: 0.2126 | Val Loss: 1.7412 | Val F1: 0.6563 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.6563

📈 Epoch 2/50


Loss: 1.8926, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.23it/s]
Val Loss: 1.3612: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch  2 | Train Loss: 2.0261 | Train F1: 0.4909 | Val Loss: 1.5387 | Val F1: 0.7353 | LR: 5.00e-04
🎉 새로운 최고 성능! F1: 0.7353

📈 Epoch 3/50


Loss: 2.2969, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.34it/s]
Val Loss: 1.3089: 100%|██████████| 10/10 [00:01<00:00,  5.90it/s]


 Epoch  3 | Train Loss: 1.7352 | Train F1: 0.6192 | Val Loss: 1.3878 | Val F1: 0.8100 | LR: 4.98e-04
🎉 새로운 최고 성능! F1: 0.8100

📈 Epoch 4/50


Loss: 1.5361, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.3109: 100%|██████████| 10/10 [00:01<00:00,  6.33it/s]


 Epoch  4 | Train Loss: 1.7112 | Train F1: 0.6303 | Val Loss: 1.3461 | Val F1: 0.8285 | LR: 4.96e-04
🎉 새로운 최고 성능! F1: 0.8285

📈 Epoch 5/50


Loss: 1.3574, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.47it/s]
Val Loss: 1.2812: 100%|██████████| 10/10 [00:01<00:00,  6.26it/s]


 Epoch  5 | Train Loss: 1.7267 | Train F1: 0.5866 | Val Loss: 1.3307 | Val F1: 0.8148 | LR: 4.92e-04

📈 Epoch 6/50


Loss: 1.1572, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.2379: 100%|██████████| 10/10 [00:01<00:00,  6.29it/s]


 Epoch  6 | Train Loss: 1.5679 | Train F1: 0.6875 | Val Loss: 1.2918 | Val F1: 0.8517 | LR: 4.88e-04
🎉 새로운 최고 성능! F1: 0.8517

📈 Epoch 7/50


Loss: 1.2529, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.37it/s]
Val Loss: 1.2492: 100%|██████████| 10/10 [00:01<00:00,  5.90it/s]


 Epoch  7 | Train Loss: 1.4795 | Train F1: 0.7540 | Val Loss: 1.2708 | Val F1: 0.8522 | LR: 4.82e-04
🎉 새로운 최고 성능! F1: 0.8522

📈 Epoch 8/50


Loss: 1.4697, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1615: 100%|██████████| 10/10 [00:01<00:00,  6.32it/s]


 Epoch  8 | Train Loss: 1.4629 | Train F1: 0.7565 | Val Loss: 1.2640 | Val F1: 0.8596 | LR: 4.76e-04
🎉 새로운 최고 성능! F1: 0.8596

📈 Epoch 9/50


Loss: 1.2588, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.35it/s]
Val Loss: 1.1963: 100%|██████████| 10/10 [00:01<00:00,  5.90it/s]


 Epoch  9 | Train Loss: 1.5054 | Train F1: 0.7526 | Val Loss: 1.2469 | Val F1: 0.8741 | LR: 4.69e-04
🎉 새로운 최고 성능! F1: 0.8741

📈 Epoch 10/50


Loss: 1.6211, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1633: 100%|██████████| 10/10 [00:01<00:00,  5.31it/s]


 Epoch 10 | Train Loss: 1.4817 | Train F1: 0.7619 | Val Loss: 1.2535 | Val F1: 0.8869 | LR: 4.61e-04
🎉 새로운 최고 성능! F1: 0.8869

📈 Epoch 11/50


Loss: 1.3750, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.35it/s]
Val Loss: 1.3525: 100%|██████████| 10/10 [00:01<00:00,  6.27it/s]


 Epoch 11 | Train Loss: 1.3997 | Train F1: 0.7585 | Val Loss: 1.2848 | Val F1: 0.8614 | LR: 4.52e-04

📈 Epoch 12/50


Loss: 1.2695, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.46it/s]
Val Loss: 1.1373: 100%|██████████| 10/10 [00:01<00:00,  5.58it/s]


 Epoch 12 | Train Loss: 1.4763 | Train F1: 0.7161 | Val Loss: 1.2270 | Val F1: 0.9088 | LR: 4.43e-04
🎉 새로운 최고 성능! F1: 0.9088

📈 Epoch 13/50


Loss: 1.2090, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.32it/s]
Val Loss: 1.1939: 100%|██████████| 10/10 [00:01<00:00,  6.24it/s]


 Epoch 13 | Train Loss: 1.3528 | Train F1: 0.7743 | Val Loss: 1.2157 | Val F1: 0.8764 | LR: 4.32e-04

📈 Epoch 14/50


Loss: 1.3164, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.43it/s]
Val Loss: 1.1363: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch 14 | Train Loss: 1.4643 | Train F1: 0.7641 | Val Loss: 1.2076 | Val F1: 0.9108 | LR: 4.21e-04
🎉 새로운 최고 성능! F1: 0.9108

📈 Epoch 15/50


Loss: 1.2383, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.38it/s]
Val Loss: 1.1220: 100%|██████████| 10/10 [00:01<00:00,  5.92it/s]


 Epoch 15 | Train Loss: 1.3732 | Train F1: 0.7885 | Val Loss: 1.1908 | Val F1: 0.8974 | LR: 4.09e-04

📈 Epoch 16/50


Loss: 1.4346, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.36it/s]
Val Loss: 1.1410: 100%|██████████| 10/10 [00:01<00:00,  5.96it/s]


 Epoch 16 | Train Loss: 1.3068 | Train F1: 0.8598 | Val Loss: 1.1995 | Val F1: 0.8938 | LR: 3.97e-04

📈 Epoch 17/50


Loss: 1.3174, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.36it/s]
Val Loss: 1.1914: 100%|██████████| 10/10 [00:01<00:00,  6.28it/s]


 Epoch 17 | Train Loss: 1.3110 | Train F1: 0.8232 | Val Loss: 1.2235 | Val F1: 0.8738 | LR: 3.84e-04

📈 Epoch 18/50


Loss: 1.2949, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.2511: 100%|██████████| 10/10 [00:01<00:00,  5.95it/s]


 Epoch 18 | Train Loss: 1.3556 | Train F1: 0.7465 | Val Loss: 1.2125 | Val F1: 0.8836 | LR: 3.70e-04

📈 Epoch 19/50


Loss: 1.8818, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.51it/s]
Val Loss: 1.0786: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]


 Epoch 19 | Train Loss: 1.2831 | Train F1: 0.8257 | Val Loss: 1.1860 | Val F1: 0.9187 | LR: 3.56e-04
🎉 새로운 최고 성능! F1: 0.9187

📈 Epoch 20/50


Loss: 1.9238, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1844: 100%|██████████| 10/10 [00:01<00:00,  5.89it/s]


 Epoch 20 | Train Loss: 1.3251 | Train F1: 0.8176 | Val Loss: 1.1817 | Val F1: 0.8994 | LR: 3.42e-04

📈 Epoch 21/50


Loss: 1.1494, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1569: 100%|██████████| 10/10 [00:01<00:00,  6.30it/s]


 Epoch 21 | Train Loss: 1.2949 | Train F1: 0.8863 | Val Loss: 1.2017 | Val F1: 0.8950 | LR: 3.27e-04

📈 Epoch 22/50


Loss: 1.3594, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.1078: 100%|██████████| 10/10 [00:01<00:00,  5.88it/s]


 Epoch 22 | Train Loss: 1.2776 | Train F1: 0.8350 | Val Loss: 1.1853 | Val F1: 0.9085 | LR: 3.12e-04

📈 Epoch 23/50


Loss: 1.3389, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1106: 100%|██████████| 10/10 [00:01<00:00,  5.97it/s]


 Epoch 23 | Train Loss: 1.2519 | Train F1: 0.8617 | Val Loss: 1.1949 | Val F1: 0.9064 | LR: 2.97e-04

📈 Epoch 24/50


Loss: 1.0781, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:12<00:00,  3.33it/s]
Val Loss: 1.0949: 100%|██████████| 10/10 [00:01<00:00,  6.10it/s]


 Epoch 24 | Train Loss: 1.2630 | Train F1: 0.8128 | Val Loss: 1.1796 | Val F1: 0.9051 | LR: 2.81e-04

📈 Epoch 25/50


Loss: 1.1357, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:12<00:00,  3.32it/s]
Val Loss: 1.2270: 100%|██████████| 10/10 [00:01<00:00,  5.92it/s]


 Epoch 25 | Train Loss: 1.3744 | Train F1: 0.7528 | Val Loss: 1.2108 | Val F1: 0.9086 | LR: 2.66e-04

📈 Epoch 26/50


Loss: 1.0850, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.0883: 100%|██████████| 10/10 [00:01<00:00,  5.94it/s]


 Epoch 26 | Train Loss: 1.2285 | Train F1: 0.8618 | Val Loss: 1.1746 | Val F1: 0.9107 | LR: 2.50e-04

📈 Epoch 27/50


Loss: 1.1123, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1326: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 27 | Train Loss: 1.2462 | Train F1: 0.8905 | Val Loss: 1.1680 | Val F1: 0.9225 | LR: 2.34e-04
🎉 새로운 최고 성능! F1: 0.9225

📈 Epoch 28/50


Loss: 1.2061, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1289: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 28 | Train Loss: 1.3409 | Train F1: 0.8292 | Val Loss: 1.1839 | Val F1: 0.9176 | LR: 2.19e-04

📈 Epoch 29/50


Loss: 2.0215, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.48it/s]
Val Loss: 1.1152: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 29 | Train Loss: 1.2753 | Train F1: 0.8468 | Val Loss: 1.1763 | Val F1: 0.9197 | LR: 2.03e-04

📈 Epoch 30/50


Loss: 1.2578, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.45it/s]
Val Loss: 1.1309: 100%|██████████| 10/10 [00:01<00:00,  5.93it/s]


 Epoch 30 | Train Loss: 1.2594 | Train F1: 0.8682 | Val Loss: 1.1656 | Val F1: 0.9186 | LR: 1.88e-04

📈 Epoch 31/50


Loss: 1.3359, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.40it/s]
Val Loss: 1.0970: 100%|██████████| 10/10 [00:01<00:00,  5.87it/s]


 Epoch 31 | Train Loss: 1.2802 | Train F1: 0.8409 | Val Loss: 1.1863 | Val F1: 0.9024 | LR: 1.73e-04

📈 Epoch 32/50


Loss: 1.2568, Mixup: False, Cutout: True, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.39it/s]
Val Loss: 1.1469: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 32 | Train Loss: 1.3748 | Train F1: 0.7990 | Val Loss: 1.1905 | Val F1: 0.9157 | LR: 1.58e-04

📈 Epoch 33/50


Loss: 1.2646, Mixup: True, Cutout: False, RandomCrop: False: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1174: 100%|██████████| 10/10 [00:01<00:00,  5.91it/s]


 Epoch 33 | Train Loss: 1.2042 | Train F1: 0.9241 | Val Loss: 1.1716 | Val F1: 0.9141 | LR: 1.44e-04

📈 Epoch 34/50


Loss: 1.2031, Mixup: False, Cutout: False, RandomCrop: True: 100%|██████████| 40/40 [00:11<00:00,  3.42it/s]
Val Loss: 1.1391: 100%|██████████| 10/10 [00:01<00:00,  6.32it/s]

 Epoch 34 | Train Loss: 1.2345 | Train F1: 0.8598 | Val Loss: 1.1756 | Val F1: 0.9033 | LR: 1.30e-04
⏸️ Early stopping at epoch 34 (patience: 7)

 Fold 5 완료!
 최고 Validation F1: 0.9225
 학습된 에폭: 34/50





0,1
best_performance/epoch,▁▁▂▂▂▃▃▃▃▄▅▆█
best_performance/val_acc,▁▃▅▅▆▆▆▆▇▇▇██
best_performance/val_f1,▁▃▅▆▆▆▆▇▇████
best_performance/val_loss,█▆▄▃▃▂▂▂▂▂▁▁▁
early_stopping/epoch,▁
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
fold,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
fold_5/batch_step,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
fold_5/cutout_applied,█▁▁▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁█▁
fold_5/mixup_applied,▁▁▁▁▁█▁▁▁▁▁▁█▁█▁▁▁█▁▁▁▁▁█▁█▁▁▁▁▁▁▁

0,1
best_performance/epoch,27
best_performance/val_acc,0.92994
best_performance/val_f1,0.92248
best_performance/val_loss,1.16797
early_stopping/epoch,34
epoch,34
fold,5
fold_5/batch_step,1320
fold_5/cutout_applied,0
fold_5/mixup_applied,0


In [12]:
# =============================================================================
# 13. K-Fold Cross Validation Results Summary
# =============================================================================

print(f"\n{'='*60}")
print(" K-FOLD CROSS VALIDATION 최종 결과")
print(f"{'='*60}")

val_f1_scores = [result['best_val_f1'] for result in fold_results]
mean_f1 = np.mean(val_f1_scores)
std_f1 = np.std(val_f1_scores)

try:
    # wandb.run이 현재 활성화된 run을 가리킴
    if wandb.run is None:
        print(" 활성화된 run이 없어 새로운 summary run을 생성합니다.")
        active_run = wandb.init(
            project=PROJECT_NAME,
            name=f"SUMMARY-{EXPERIMENT_NAME}-{datetime.now().strftime('%m%d-%H%M')}",
            config=config,
            tags=["summary", "cv-results", model_name],
            group="k-fold-experiment",
            job_type="summary",
            reinit=True
        )
    else:
        print(" 기존 run을 사용합니다.")
        active_run = wandb.run
        
except Exception as e:
    print(f" Run 상태 확인 중 에러: {e}")
    # 새로운 run 생성
    active_run = wandb.init(
        project=PROJECT_NAME,
        name=f"SUMMARY-{EXPERIMENT_NAME}-{datetime.now().strftime('%m%d-%H%M')}",
        config=config,
        tags=["summary", "cv-results", model_name],
        group="k-fold-experiment",
        job_type="summary",
        reinit=True
    )

# CV 요약 테이블 생성
fold_table = wandb.Table(columns=[
    "Fold", "Best_Val_F1", "Final_Train_F1", "Train_Samples", 
    "Val_Samples", "Epochs_Trained", "Early_Stopped"
])

for result in fold_results:
    fold_table.add_data(
        result['fold'], 
        result['best_val_f1'], 
        result['final_train_f1'],
        result['train_samples'], 
        result['val_samples'],
        result['epochs_trained'],
        result['early_stopped']
    )

# 안전한 로깅
try:
    active_run.log({
        "cv_results/mean_f1": mean_f1,
        "cv_results/std_f1": std_f1,
        "cv_results/best_fold_f1": max(val_f1_scores),
        "cv_results/worst_fold_f1": min(val_f1_scores),
        "cv_results/f1_range": max(val_f1_scores) - min(val_f1_scores),
        "cv_results/fold_results_table": fold_table,
        "cv_results/n_folds": N_FOLDS,
        "cv_results/total_epochs": sum([r['epochs_trained'] for r in fold_results]),
        "cv_results/avg_epochs_per_fold": np.mean([r['epochs_trained'] for r in fold_results]),
        "cv_results/early_stopped_folds": sum([r['early_stopped'] for r in fold_results])
    })
    
    # Fold별 성능 바차트 생성
    fold_performance_data = [[f"Fold {i+1}", score] for i, score in enumerate(val_f1_scores)]
    active_run.log({
        "cv_results/fold_performance_chart": wandb.plot.bar(
            wandb.Table(data=fold_performance_data, columns=["Fold", "F1_Score"]),
            "Fold", "F1_Score", 
            title="K-Fold Cross Validation Performance"
        )
    })
    
    print(" CV 결과 로깅 완료!")
    
except Exception as e:
    print(f" WandB 로깅 중 에러: {e}")
    print(" 결과를 콘솔에 출력합니다:")

# 어떤 경우든 콘솔에는 결과 출력
for result in fold_results:
    status = " Early Stopped" if result['early_stopped'] else " Completed"
    print(f"Fold {result['fold']}: {result['best_val_f1']:.4f} "
          f"({result['epochs_trained']} epochs) {status}")

print(f"\n 평균 CV F1: {mean_f1:.4f} ± {std_f1:.4f}")
print(f" 최고 Fold: {max(val_f1_scores):.4f}")
print(f" 최악 Fold: {min(val_f1_scores):.4f}")
print(f" 성능 범위: {max(val_f1_scores) - min(val_f1_scores):.4f}")



 K-FOLD CROSS VALIDATION 최종 결과
 활성화된 run이 없어 새로운 summary run을 생성합니다.


 CV 결과 로깅 완료!
Fold 1: 0.9271 (27 epochs)  Early Stopped
Fold 2: 0.9412 (48 epochs)  Early Stopped
Fold 3: 0.9322 (34 epochs)  Early Stopped
Fold 4: 0.9186 (27 epochs)  Early Stopped
Fold 5: 0.9225 (34 epochs)  Early Stopped

 평균 CV F1: 0.9283 ± 0.0079
 최고 Fold: 0.9412
 최악 Fold: 0.9186
 성능 범위: 0.0227


In [13]:

# =============================================================================
# 14. Ensemble Models Preparation
# =============================================================================

# 5-Fold 앙상블 모델 준비
ensemble_models = []
print(f"\n🔧 앙상블 모델 준비 중...")

for i, state_dict in enumerate(fold_models):
    fold_model = timm.create_model(model_name, pretrained=True, num_classes=17).to(device)
    fold_model.load_state_dict(state_dict)
    fold_model.eval()
    ensemble_models.append(fold_model)
    print(f"Fold {i+1} 모델 로드 완료")

print(f" 총 {len(ensemble_models)}개 모델로 앙상블 구성")

try:
    if wandb.run is not None:
        wandb.run.log({
            "ensemble/num_models": len(ensemble_models),
            "ensemble/model_architecture": model_name,
            "ensemble/ensemble_type": "simple_average"
        })
    else:
        print("📊 앙상블 정보:")
        print(f"  - 모델 개수: {len(ensemble_models)}")
        print(f"  - 아키텍처: {model_name}")
        print(f"  - 앙상블 타입: simple_average")
except Exception as e:
    print(f"⚠️ 앙상블 정보 로깅 실패: {e}")



🔧 앙상블 모델 준비 중...
Fold 1 모델 로드 완료
Fold 2 모델 로드 완료
Fold 3 모델 로드 완료
Fold 4 모델 로드 완료
Fold 5 모델 로드 완료
 총 5개 모델로 앙상블 구성


In [14]:

# =============================================================================
# 15. TTA (Test Time Augmentation) Setup
# =============================================================================

# Temperature Scaling 클래스 정의
class TemperatureScaling(nn.Module):
    def __init__(self, temperature=1.5):
        super().__init__()
        self.temperature = nn.Parameter(torch.ones(1) * temperature)
    
    def forward(self, logits):
        return logits / self.temperature

print(f"\n TTA (Test Time Augmentation) 설정...")

# Essential TTA transforms
essential_tta_transforms = [
    # 원본
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 90도 회전들
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[90, 90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[180, 180], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[-90, -90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 밝기 개선
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.RandomBrightnessContrast(brightness_limit=[0.3, 0.3], contrast_limit=[0.3, 0.3], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
]

print(f"TTA 변환 {len(essential_tta_transforms)}개 준비 완료")

try:
    if wandb.run is not None:
        wandb.run.log({
            "tta/num_transforms": len(essential_tta_transforms),
            "tta/transforms_used": ["original", "rot_90", "rot_180", "rot_270", "brightness"],
            "tta/batch_size": 64  # TTA용 배치 크기
        })
    else:
        print("📊 TTA 설정 정보:")
        print(f"  - 변형 개수: {len(essential_tta_transforms)}")
        print(f"  - 변형 종류: original, rot_90, rot_180, rot_270, brightness")
        print(f"  - 배치 크기: 64")
except Exception as e:
    print(f"⚠️ TTA 설정 로깅 실패: {e}")
    print("📊 TTA 설정 정보:")
    print(f"  - 변형 개수: {len(essential_tta_transforms)}")
    print(f"  - 배치 크기: 64")


 TTA (Test Time Augmentation) 설정...
TTA 변환 5개 준비 완료


In [15]:
# =============================================================================
# 16. TTA Dataset and DataLoader
# =============================================================================

class TTAImageDataset(Dataset):
    def __init__(self, data, path, transforms):
        if isinstance(data, str):
            self.df = pd.read_csv(data).values
        else:
            self.df = data.values
        self.path = path
        self.transforms = transforms  # 여러 transform을 리스트로 받음

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        
        # 모든 transform을 적용한 결과를 리스트로 반환
        augmented_images = []
        for transform in self.transforms:
            aug_img = transform(image=img)['image']
            augmented_images.append(aug_img)
        
        return augmented_images, target

# TTA Dataset 생성
tta_dataset = TTAImageDataset(
    "../data/sample_submission.csv",
    "../data/test/",
    essential_tta_transforms
)

# TTA DataLoader (배치 크기를 줄여서 메모리 절약)
tta_loader = DataLoader(
    tta_dataset,
    batch_size=64,  # TTA는 메모리를 많이 사용하므로 배치 크기 줄임
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

print(f" TTA Dataset: {len(tta_dataset)}개 테스트 샘플")

 TTA Dataset: 3140개 테스트 샘플


In [16]:

# =============================================================================
# 17. Ensemble + TTA Inference with WandB Logging
# =============================================================================

def ensemble_tta_inference_with_logging(models, loader, transforms, confidence_threshold=0.9):
    """5-Fold 모델 앙상블 + TTA 추론 with WandB 로깅"""
    all_predictions = []
    all_confidences = []
    
    # TTA 진행상황 로깅을 위한 테이블
    tta_progress = wandb.Table(columns=["Batch", "Avg_Confidence", "Low_Conf_Count", "High_Conf_Count"])
    
    # Temperature scaling 초기화
    temp_scaling = TemperatureScaling().to(device)
    
    print(f"앙상블 TTA 추론 시작...")
    print(f"{len(models)}개 모델 × {len(transforms)}개 TTA 변형 = {len(models) * len(transforms)}개 예측 평균")
    
    start_time = time.time()
    
    for batch_idx, (images_list, _) in enumerate(tqdm(loader, desc="Ensemble TTA")):
        batch_size = images_list[0].size(0)
        ensemble_probs = torch.zeros(batch_size, 17).to(device)
        
        # 각 fold 모델별 예측
        for model_idx, model in enumerate(models):
            model.eval()
            with torch.no_grad():
                # 각 TTA 변형별 예측
                for tta_idx, images in enumerate(images_list):
                    images = images.to(device)
                    preds = model(images)
                    
                    # Temperature scaling 적용
                    preds = temp_scaling(preds)
                    probs = torch.softmax(preds, dim=1)
                    
                    # 앙상블 확률에 누적 (평균)
                    ensemble_probs += probs / (len(models) * len(images_list))
        
        # 신뢰도 계산
        max_probs = torch.max(ensemble_probs, dim=1)[0]
        batch_confidences = max_probs.cpu().numpy()
        all_confidences.extend(batch_confidences)
        
        final_preds = torch.argmax(ensemble_probs, dim=1)
        all_predictions.extend(final_preds.cpu().numpy())
        
        # 배치별 신뢰도 분석
        high_conf_count = np.sum(batch_confidences >= confidence_threshold)
        low_conf_count = batch_size - high_conf_count
        avg_confidence = np.mean(batch_confidences)
        
        # 진행상황 테이블에 추가
        tta_progress.add_data(batch_idx, avg_confidence, low_conf_count, high_conf_count)
        
        # 배치별 상세 로깅 (20배치마다)
        if batch_idx % 20 == 0:
            elapsed_time = time.time() - start_time
            estimated_total = elapsed_time * len(loader) / (batch_idx + 1)
            remaining_time = estimated_total - elapsed_time
            
            wandb.log({
                "tta_progress/batch": batch_idx,
                "tta_progress/avg_confidence": avg_confidence,
                "tta_progress/high_confidence_ratio": high_conf_count / batch_size,
                "tta_progress/low_confidence_count": low_conf_count,
                "tta_progress/elapsed_time_min": elapsed_time / 60,
                "tta_progress/estimated_remaining_min": remaining_time / 60,
                "tta_progress/samples_processed": (batch_idx + 1) * batch_size,
            })
    
    total_time = time.time() - start_time
    
    # TTA 최종 결과 로깅
    final_avg_confidence = np.mean(all_confidences)
    confidence_std = np.std(all_confidences)
    high_conf_samples = np.sum(np.array(all_confidences) >= confidence_threshold)
    
    wandb.log({
        "tta_results/total_time_min": total_time / 60,
        "tta_results/samples_per_second": len(all_predictions) / total_time,
        "tta_results/final_avg_confidence": final_avg_confidence,
        "tta_results/confidence_std": confidence_std,
        "tta_results/high_confidence_samples": high_conf_samples,
        "tta_results/high_confidence_ratio": high_conf_samples / len(all_predictions),
        "tta_results/total_predictions": len(all_predictions),
        "tta_results/confidence_histogram": wandb.Histogram(all_confidences),
        "tta_results/progress_table": tta_progress
    })
    
    print(f"\n 앙상블 TTA 추론 완료!")
    print(f"총 소요시간: {total_time/60:.1f}분")
    print(f" 평균 신뢰도: {final_avg_confidence:.4f} ± {confidence_std:.4f}")
    print(f" 고신뢰도 샘플: {high_conf_samples}/{len(all_predictions)} ({high_conf_samples/len(all_predictions)*100:.1f}%)")
    
    return all_predictions, all_confidences

# 앙상블 TTA 실행
print(f"\n{'='*60}")
print(" 최종 추론 - 앙상블 + TTA")
print(f"{'='*60}")

tta_predictions, confidences = ensemble_tta_inference_with_logging(
    models=ensemble_models, 
    loader=tta_loader, 
    transforms=essential_tta_transforms,
    confidence_threshold=0.9
)



 최종 추론 - 앙상블 + TTA
앙상블 TTA 추론 시작...
5개 모델 × 5개 TTA 변형 = 25개 예측 평균


Ensemble TTA: 100%|██████████| 50/50 [02:51<00:00,  3.42s/it]



 앙상블 TTA 추론 완료!
총 소요시간: 2.9분
 평균 신뢰도: 0.4164 ± 0.0929
 고신뢰도 샘플: 0/3140 (0.0%)


In [17]:
# =============================================================================
# 18. Final Results and Submission
# =============================================================================

print(f"\n 최종 결과 정리 중...")

# TTA 결과로 submission 파일 생성
tta_pred_df = pd.DataFrame(tta_dataset.df, columns=['ID', 'target'])
tta_pred_df['target'] = tta_predictions

# 기존 submission과 동일한 순서인지 확인
sample_submission_df = pd.read_csv("../data/sample_submission.csv")
assert (sample_submission_df['ID'] == tta_pred_df['ID']).all(), "ID 순서 불일치!"

# 예측 분포 분석
pred_distribution = tta_pred_df['target'].value_counts().sort_index()
pred_table = wandb.Table(columns=["Class", "Count", "Percentage"])

print(f"\n📊 예측 결과 분포:")
for class_id in range(17):
    count = pred_distribution.get(class_id, 0)
    percentage = count / len(tta_pred_df) * 100
    pred_table.add_data(class_id, count, percentage)
    print(f"Class {class_id:2d}: {count:4d} ({percentage:5.1f}%)")

# 신뢰도 분석
confidence_bins = [0.5, 0.7, 0.8, 0.9, 0.95, 1.0]
confidence_analysis = {}
for i, threshold in enumerate(confidence_bins):
    if i == 0:
        count = np.sum(np.array(confidences) >= threshold)
    else:
        prev_threshold = confidence_bins[i-1]
        count = np.sum((np.array(confidences) >= prev_threshold) & (np.array(confidences) < threshold))
    confidence_analysis[f"conf_{threshold}"] = count

# 최종 결과 로깅
try:
    if wandb.run is not None:
        wandb.run.log({
            "final_results/total_predictions": len(tta_predictions),
            "final_results/unique_classes_predicted": len(np.unique(tta_predictions)),
            "final_results/prediction_distribution_table": pred_table,
            "final_results/avg_confidence": np.mean(confidences),
            "final_results/median_confidence": np.median(confidences),
            "final_results/min_confidence": np.min(confidences),
            "final_results/max_confidence": np.max(confidences),
            "final_results/confidence_distribution": wandb.Histogram(confidences),
            **confidence_analysis
        })
        print("최종 결과 WandB 로깅 완료!")
    else:
        print("활성화된 run이 없어 로깅을 건너뜁니다.")
except Exception as e:
    print(f"WandB 로깅 중 에러: {e}")

# 콘솔 출력은 항상 실행
print(f"총 예측 수: {len(tta_predictions)}")
print(f"예측된 클래스 수: {len(np.unique(tta_predictions))}")
print(f"평균 신뢰도: {np.mean(confidences):.4f}")
print(f"신뢰도 범위: {np.min(confidences):.4f} ~ {np.max(confidences):.4f}")


# 예측 분포 바차트
try:
    if wandb.run is not None:
        pred_dist_data = [[f"Class_{i}", pred_distribution.get(i, 0)] for i in range(17)]
        wandb.run.log({
            "final_results/prediction_distribution_chart": wandb.plot.bar(
                wandb.Table(data=pred_dist_data, columns=["Class", "Count"]),
                "Class", "Count", 
                title="Final Prediction Distribution"
            )
        })
        print("예측 분포 차트 로깅 완료!")
    else:
        print("차트 로깅을 건너뜁니다.")
except Exception as e:
    print(f"차트 로깅 중 에러: {e}")

# 결과 저장
output_path = "../output/choice4.csv"
tta_pred_df.to_csv(output_path, index=False)

# 결과 파일을 WandB 아티팩트로 저장
artifact = wandb.Artifact(
    name="final_predictions",
    type="predictions",
    description=f"Final ensemble predictions with {N_FOLDS}-fold CV + TTA"
)
artifact.add_file(output_path)

try:
    if wandb.run is not None:
        wandb.run.log_artifact(artifact)
        print("실험 요약 로깅 완료!")
    else:
        print("활성화된 run이 없어 실험 요약 로깅을 건너뜁니다.")
except Exception as e:
    print(f"실험 요약 로깅 중 에러: {e}")


print(f"\n 최종 결과 저장 완료!")
print(f" 파일 위치: {output_path}")
print(f" 총 예측 수: {len(tta_predictions)}")


 최종 결과 정리 중...

📊 예측 결과 분포:
Class  0:  200 (  6.4%)
Class  1:   91 (  2.9%)
Class  2:  200 (  6.4%)
Class  3:  258 (  8.2%)
Class  4:  199 (  6.3%)
Class  5:  200 (  6.4%)
Class  6:  203 (  6.5%)
Class  7:  142 (  4.5%)
Class  8:  200 (  6.4%)
Class  9:  200 (  6.4%)
Class 10:  210 (  6.7%)
Class 11:  191 (  6.1%)
Class 12:  199 (  6.3%)
Class 13:  158 (  5.0%)
Class 14:   89 (  2.8%)
Class 15:  200 (  6.4%)
Class 16:  200 (  6.4%)
최종 결과 WandB 로깅 완료!
총 예측 수: 3140
예측된 클래스 수: 17
평균 신뢰도: 0.4164
신뢰도 범위: 0.1129 ~ 0.6469
예측 분포 차트 로깅 완료!
실험 요약 로깅 완료!

 최종 결과 저장 완료!
 파일 위치: ../output/choice4.csv
 총 예측 수: 3140


In [18]:
# =============================================================================
# 19. Experiment Summary and Cleanup
# =============================================================================

# 실험 요약 생성
experiment_summary = {
    "experiment_name": main_run.name,
    "model_architecture": model_name,
    "image_size": img_size,
    "cv_strategy": f"{N_FOLDS}-Fold StratifiedKFold",
    "cv_mean_f1": mean_f1,
    "cv_std_f1": std_f1,
    "cv_best_fold": max(val_f1_scores),
    "ensemble_models": len(ensemble_models),
    "tta_transforms": len(essential_tta_transforms),
    "total_training_time_min": sum([r['epochs_trained'] for r in fold_results]) * 2,  # 추정치
    "avg_prediction_confidence": np.mean(confidences),
    "high_confidence_predictions": np.sum(np.array(confidences) >= 0.9),
    "experiment_tags": ["baseline", "efficientnet-b3", "k-fold-cv", "tta", "ensemble"]
}

# 실험 요약
try:
    if wandb.run is not None:
        wandb.run.log({"experiment_summary": experiment_summary})
        print("실험 요약 로깅 완료!")
    else:
        print("활성화된 run이 없어 실험 요약 로깅을 건너뜁니다.")
except Exception as e:
    print(f"실험 요약 로깅 중 에러: {e}")


# 마지막 상태 업데이트
try:
    if wandb.run is not None:
        wandb.run.log({
            "status": "completed",
            "completion_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "total_runtime_hours": 0  # start_time 속성 문제로 일단 0으로 설정
        })
        print("최종 상태 업데이트 완료!")
    else:
        print("활성화된 run이 없어 상태 업데이트를 건너뜁니다.")
except Exception as e:
    print(f"상태 업데이트 중 에러: {e}")

print(f"\n실험 완료 시간: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

print(f"\n{'='*60}")
print("실험 완료!")
print(f"{'='*60}")

print(f" K-Fold CV 결과: {mean_f1:.4f} ± {std_f1:.4f}")
print(f" 최고 성능 Fold: {max(val_f1_scores):.4f}")
print(f" 앙상블 모델: {len(ensemble_models)}개")
print(f" TTA 변형: {len(essential_tta_transforms)}개")
print(f" 평균 예측 신뢰도: {np.mean(confidences):.4f}")
print(f" WandB 대시보드: {main_run.url}")

# Sample predictions 출력
print(f"\n 예측 결과 샘플:")
print(tta_pred_df.head(10))

# 메인 run 종료
main_run.finish()

print(f"\n 모든 작업 완료!")
print(f" 결과 파일: {output_path}")
print(f" WandB에서 전체 실험 결과를 확인하세요!")

# 메모리 정리
del ensemble_models
torch.cuda.empty_cache()

실험 요약 로깅 완료!
최종 상태 업데이트 완료!

실험 완료 시간: 2025-09-04 14:24:08

실험 완료!
 K-Fold CV 결과: 0.9283 ± 0.0079
 최고 성능 Fold: 0.9412
 앙상블 모델: 5개
 TTA 변형: 5개
 평균 예측 신뢰도: 0.4164
 WandB 대시보드: https://wandb.ai/kimsunmin0227-hufs/document-classification-team/runs/mdvqqydn

 예측 결과 샘플:
                     ID  target
0  0008fdb22ddce0ce.jpg       2
1  00091bffdffd83de.jpg      12
2  00396fbc1f6cc21d.jpg       5
3  00471f8038d9c4b6.jpg      12
4  00901f504008d884.jpg       2
5  009b22decbc7220c.jpg      15
6  00b33e0ee6d59427.jpg       0
7  00bbdcfbbdb3e131.jpg       8
8  00c03047e0fbef40.jpg      15
9  00c0dabb63ca7a16.jpg      11

 모든 작업 완료!
 결과 파일: ../output/choice4.csv
 WandB에서 전체 실험 결과를 확인하세요!
