## 1. Prepare Environments

* 데이터 로드를 위한 구글 드라이브를 마운트합니다.
* 필요한 라이브러리를 설치합니다.

In [1]:
# # 필요한 라이브러리를 설치합니다.
# !pip install timm
# !pip install matplotlib
# !pip install seaborn
# !pip install optuna
# !apt install -y libgl1-mesa-glx
# !pip install albumentations

## 2. Import Library & Define Functions
* 학습 및 추론에 필요한 라이브러리를 로드합니다.
* 학습 및 추론에 필요한 함수와 클래스를 정의합니다.

In [2]:
import os
import time
import random
import copy

import optuna, math
import timm
import torch
import cv2
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import autocast, GradScaler  # Mixed Precision용

from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold

from collections import Counter
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import wandb
from datetime import datetime

# 한글 폰트 설정 (시각화용)
plt.rcParams['font.family'] = ['DejaVu Sans']

  warn(


In [3]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

## 3. Hyper-parameters
* 학습 및 추론에 필요한 하이퍼파라미터들을 정의합니다.

In [4]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '../data/'

# model config
# model_name = 'tf_efficientnetv2_b3' # 'resnet50' 'efficientnet-b0', ...
# model_name = 'swin_base_patch4_window12_384_in22k'
model_name = 'convnext_base_384_in22ft1k'
# model_name = 'convnextv2_base.fcmae_ft_in22k_in1k_384'
# model_name = 'vit_base_patch16_clip_384.laion2b_ft_in12k_in1k' # openclip
# model_name = 'vit_base_patch16_384.augreg_in1k' # augreg
# model_name = 'eva02_enormous_patch14_plus_clip_224.laion2b_s9b_b144k' # eva-02 멀티모달
# model_name = 'eva02_large_patch14_448.mim_in22k_ft_in1k' #448 테스트용
# model_name = 'vit_base_patch14_reg4_dinov2.lvd142m' # dinov2 reg4

# model_name = 'eva02_large_patch14_448.mim_in22k_ft_in1k' #448 테스트용

# training config
img_size = 384
LR = 2e-4
EPOCHS = 10
BATCH_SIZE = 24
num_workers = 8
EMA = True  # Exponential Moving Average 사용 여부

## 4. Load Data
* 학습, 테스트 데이터셋과 로더를 정의합니다.

# K-Fold 적용

## 5. Train Model
* 모델을 로드하고, 학습을 진행합니다.

In [5]:
# 5-Fold 앙상블 모델 준비
ensemble_models = []
for i in range(5):  # fold 개수만큼
    fold_model = timm.create_model(model_name, pretrained=False, num_classes=17).to(device)  # pretrained=False로 변경
    
    # fold별 저장된 파일 로드
    checkpoint = torch.load(f'models/fold_{i+1}_best.pth')  # fold별 파일 경로
    fold_model.load_state_dict(checkpoint)
    fold_model.eval()
    
    ensemble_models.append(fold_model)
    print(f"✓ Fold {i+1} model loaded from models/fold_{i+1}_best.pth")

print(f"Using ensemble of all {len(ensemble_models)} fold models for inference")

✓ Fold 1 model loaded from models/fold_1_best.pth
✓ Fold 2 model loaded from models/fold_2_best.pth
✓ Fold 3 model loaded from models/fold_3_best.pth
✓ Fold 4 model loaded from models/fold_4_best.pth
✓ Fold 5 model loaded from models/fold_5_best.pth
Using ensemble of all 5 fold models for inference


# 추론 점수용

In [6]:
# ==================== 로컬 F1 검증을 위한 Holdout Validation ====================
# Holdout Validation Set 생성
train_df = pd.read_csv("../data/train.csv")

# 20% holdout validation
from sklearn.model_selection import train_test_split
train_fold, holdout_val = train_test_split(
    train_df, 
    test_size=0.2, 
    stratify=train_df['target'], 
    random_state=42
)

print(f"Train samples: {len(train_fold)}")
print(f"Holdout validation samples: {len(holdout_val)}")

Train samples: 1256
Holdout validation samples: 314


# 6. Inference & Save File
* 테스트 이미지에 대한 추론을 진행하고, 결과 파일을 저장합니다.

In [7]:
# Temperature Scaling 클래스 정의
class TemperatureScaling(nn.Module):
    def __init__(self):
        super().__init__()
        self.temperature = nn.Parameter(torch.ones(1) * 1.5)
    
    def forward(self, logits):
        return logits / self.temperature

In [8]:
essential_tta_transforms = [
    # 원본
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 90도 회전들
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[90, 90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[180, 180], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.Rotate(limit=[-90, -90], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    # 밝기 개선
    A.Compose([
        A.LongestMaxSize(max_size=img_size),
        A.PadIfNeeded(min_height=img_size, min_width=img_size, border_mode=0, value=0),
        A.RandomBrightnessContrast(brightness_limit=[0.3, 0.3], contrast_limit=[0.3, 0.3], p=1.0),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
]

In [9]:
# # TTA 추론을 위한 Dataset 클래스
# class TTAImageDataset(Dataset):
#     def __init__(self, data, path, transforms):
#         if isinstance(data, str):
#             self.df = pd.read_csv(data).values
#         else:
#             self.df = data.values
#         self.path = path
#         self.transforms = transforms  # 여러 transform을 리스트로 받음

#     def __len__(self):
#         return len(self.df)

#     def __getitem__(self, idx):
#         name, target = self.df[idx]
#         img = np.array(Image.open(os.path.join(self.path, name)).convert('RGB'))
        
#         # 모든 transform을 적용한 결과를 리스트로 반환
#         augmented_images = []
#         for transform in self.transforms:
#             aug_img = transform(image=img)['image']
#             augmented_images.append(aug_img)
        
#         return augmented_images, target

In [10]:
def test_time_preprocessing(img_array):
    """Test-time 이미지 품질 개선 - 완전판"""
    # RGB to Grayscale
    if len(img_array.shape) == 3:
        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
    else:
        gray = img_array
    
    # 1. 회전 보정 (문서 경계선 기반)
    try:
        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
        lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=100)
        if lines is not None:
            angles = []
            for rho, theta in lines[:10]:  # 상위 10개 선만 사용
                angle = np.degrees(theta) - 90
                angles.append(angle)
            
            if angles:
                median_angle = np.median(angles)
                if abs(median_angle) > 1:  # 1도 이상 기울어진 경우만 보정
                    h, w = gray.shape
                    center = (w//2, h//2)
                    M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
                    gray = cv2.warpAffine(gray, M, (w, h), borderValue=255)
    except:
        pass  # 회전 보정 실패 시 원본 유지
    
    # 2. CLAHE (대비 개선)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    enhanced = clahe.apply(gray)
    
    # 3. Gamma 보정 (밝기 개선)
    gamma = 1.3
    enhanced = np.power(enhanced/255.0, gamma) * 255
    enhanced = enhanced.astype(np.uint8)
    
    # 4. Deblur (언샤프 마스킹)
    gaussian = cv2.GaussianBlur(enhanced, (0, 0), 2.0)
    deblurred = cv2.addWeighted(enhanced, 1.5, gaussian, -0.5, 0)
    
    # 5. Denoising
    denoised = cv2.fastNlMeansDenoising(deblurred)
    
    # 6. 최종 선명화
    kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    sharpened = cv2.filter2D(denoised, -1, kernel)
    
    # RGB로 변환
    result = cv2.cvtColor(sharpened, cv2.COLOR_GRAY2RGB)
    return result

In [11]:
class TTAImageDataset(Dataset):
    def __init__(self, data, path, transforms, preprocessing=False):  # 옵션 추가
        if isinstance(data, str):
            self.df = pd.read_csv(data).values
        else:
            self.df = data.values
        self.path = path
        self.transforms = transforms
        self.preprocessing = preprocessing

    def __len__(self):  # 이 메서드가 누락되어 있었음
        return len(self.df)
    
    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)).convert('RGB'))
        
        # 전처리 적용 여부
        if self.preprocessing:
            img = test_time_preprocessing(img)
        
        augmented_images = []
        for transform in self.transforms:
            aug_img = transform(image=img)['image']
            augmented_images.append(aug_img)
        
        return augmented_images, target

In [12]:
# # TTA Dataset 생성
# tta_dataset = TTAImageDataset(
#     "../data/sample_submission.csv",
#     "../data/test/",
#     essential_tta_transforms
# )

# # TTA DataLoader (배치 크기를 줄여서 메모리 절약)
# tta_loader = DataLoader(
#     tta_dataset,
#     batch_size=64,  # TTA는 메모리를 많이 사용하므로 배치 크기 줄임
#     shuffle=False,
#     num_workers=num_workers,
#     pin_memory=True
# )

# print(f"TTA Dataset size: {len(tta_dataset)}")

In [13]:
# TTA Dataset 생성 - 테스트용과 검증용 모두 생성
test_tta_dataset = TTAImageDataset(
    "../data/sample_submission.csv",
    "../data/test/",
    essential_tta_transforms
)

# Holdout Validation Dataset 생성 (로컬 F1 검증용)
val_tta_dataset = TTAImageDataset(
    holdout_val,
    "../data/train/",  # validation도 train 폴더에 있음
    essential_tta_transforms
)

# DataLoader들
test_tta_loader = DataLoader(test_tta_dataset, batch_size=64, shuffle=False, num_workers=num_workers, pin_memory=True)
val_tta_loader = DataLoader(val_tta_dataset, batch_size=64, shuffle=False, num_workers=num_workers, pin_memory=True)

print(f"Test TTA Dataset size: {len(test_tta_dataset)}")
print(f"Validation TTA Dataset size: {len(val_tta_dataset)}")

# 기존 변수명 호환성 유지
tta_dataset = test_tta_dataset
tta_loader = test_tta_loader

Test TTA Dataset size: 3140
Validation TTA Dataset size: 314


In [14]:
def ensemble_tta_inference(models, loader, transforms, confidence_threshold=0.9):
    """5-Fold 모델 앙상블 + TTA 추론"""
    temp_scaling = TemperatureScaling().to(device)  # Temperature Scaling 추가
    all_predictions = []
    
    for batch_idx, (images_list, _) in enumerate(tqdm(loader, desc="Ensemble TTA")):
        batch_size = images_list[0].size(0)
        ensemble_probs = torch.zeros(batch_size, 17).to(device)
        
        # 각 fold 모델별 예측
        for model in models:
            with torch.no_grad():
                # 각 TTA 변형별 예측
                for images in images_list:
                    images = images.to(device)
                    preds = model(images)
                    
                    # Temperature Scaling 적용
                    scaled_preds = temp_scaling(preds)
                    probs = torch.softmax(scaled_preds, dim=1)
                    
                    ensemble_probs += probs / (len(models) * len(images_list))
        
        final_preds = torch.argmax(ensemble_probs, dim=1)
        all_predictions.extend(final_preds.cpu().numpy())
    
    return all_predictions

In [15]:
# 앙상블 TTA 실행
print("Starting Ensemble TTA inference...")
tta_predictions = ensemble_tta_inference(
    models=ensemble_models, 
    loader=tta_loader, 
    transforms=essential_tta_transforms,
    confidence_threshold=0.9
)

Starting Ensemble TTA inference...


Ensemble TTA: 100%|██████████| 50/50 [09:48<00:00, 11.77s/it]


In [16]:
# ==================== 로컬 F1 검증 ====================
print("로컬 F1 검증 시작...")
val_predictions = ensemble_tta_inference(
    models=ensemble_models, 
    loader=val_tta_loader, 
    transforms=essential_tta_transforms,
    confidence_threshold=0.9
)

# F1 점수 계산
true_labels = holdout_val['target'].values
local_f1 = f1_score(true_labels, val_predictions, average='macro')

print(f"로컬 Holdout F1 점수: {local_f1:.4f}")
print("이 점수가 실제 제출 점수와 유사할 가능성이 높습니다.")

로컬 F1 검증 시작...


Ensemble TTA: 100%|██████████| 5/5 [01:05<00:00, 13.08s/it]

로컬 Holdout F1 점수: 0.9161
이 점수가 실제 제출 점수와 유사할 가능성이 높습니다.





In [17]:
# 전처리 전후 비교
print("전처리 없음으로 검증 중...")
val_dataset_without = TTAImageDataset(holdout_val, "../data/train/", essential_tta_transforms, preprocessing=False)
val_loader_without = DataLoader(val_dataset_without, batch_size=64, shuffle=False)
f1_without = f1_score(true_labels, ensemble_tta_inference(ensemble_models, val_loader_without, essential_tta_transforms), average='macro')

print("전처리 적용으로 검증 중...")
val_dataset_with = TTAImageDataset(holdout_val, "../data/train/", essential_tta_transforms, preprocessing=True)
val_loader_with = DataLoader(val_dataset_with, batch_size=64, shuffle=False)
f1_with = f1_score(true_labels, ensemble_tta_inference(ensemble_models, val_loader_with, essential_tta_transforms), average='macro')

print(f"전처리 전: {f1_without:.4f}")
print(f"전처리 후: {f1_with:.4f}")
print(f"개선 효과: +{(f1_with - f1_without)*100:.1f}%p")

전처리 없음으로 검증 중...


Ensemble TTA: 100%|██████████| 5/5 [01:09<00:00, 13.87s/it]


전처리 적용으로 검증 중...


Ensemble TTA: 100%|██████████| 5/5 [02:40<00:00, 32.19s/it]

전처리 전: 0.9161
전처리 후: 0.7949
개선 효과: +-12.1%p





In [22]:
# TTA 결과로 submission 파일 생성
tta_pred_df = pd.DataFrame(tta_dataset.df, columns=['ID', 'target'])
tta_pred_df['target'] = tta_predictions

In [23]:
# 기존 submission과 동일한 순서인지 확인
sample_submission_df = pd.read_csv("../data/sample_submission.csv")
assert (sample_submission_df['ID'] == tta_pred_df['ID']).all()

In [24]:
# TTA 결과 저장
tta_pred_df.to_csv("../submission/choice.csv", index=False)
print("TTA predictions saved")

print("TTA Prediction sample:")

TTA predictions saved
TTA Prediction sample:


In [25]:
tta_pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2
