# 라이브러리 및 설정

In [None]:
import os, random
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch import nn, optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

CFG = {
    'IMG_SIZE': 224,
    'BATCH_SIZE': 64,
    'EPOCHS': 100,
    'LEARNING_RATE': 1e-4,
    'SEED': 42,
    'NUM_CLASSES': 397,
    'RGB_WEIGHT': 0.4,
    'GRAY_WEIGHT': 0.6
}

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED'])

# 데이터셋 및 전처리

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, image_paths, labels=None, transform=None, mode='rgb', is_test=False):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.mode = mode
        self.is_test = is_test

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("L" if self.mode == 'gray' else "RGB")
        if self.mode == 'gray':
            image = image.convert("RGB")
        if self.transform:
            image = self.transform(image)
        if self.is_test:
            return image, os.path.basename(img_path)
        return image, self.labels[idx]

transform_rgb = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_gray = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 데이터 로드

In [None]:
train_root = './train'
test_root = './test'

class_names = sorted(os.listdir(train_root))
class_to_idx = {cls_name: i for i, cls_name in enumerate(class_names)}

image_paths, labels = [], []
for cls_name in class_names:
    for fname in os.listdir(os.path.join(train_root, cls_name)):
        if fname.lower().endswith('.jpg'):
            image_paths.append(os.path.join(train_root, cls_name, fname))
            labels.append(class_to_idx[cls_name])

train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, stratify=labels, random_state=CFG['SEED']
)

train_loader_rgb = DataLoader(CustomImageDataset(train_paths, train_labels, transform_rgb, 'rgb'), batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader_rgb   = DataLoader(CustomImageDataset(val_paths, val_labels, transform_rgb, 'rgb'), batch_size=CFG['BATCH_SIZE'], shuffle=False)

train_loader_gray = DataLoader(CustomImageDataset(train_paths, train_labels, transform_gray, 'gray'), batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader_gray   = DataLoader(CustomImageDataset(val_paths, val_labels, transform_gray, 'gray'), batch_size=CFG['BATCH_SIZE'], shuffle=False)

test_image_paths = sorted([os.path.join(test_root, f) for f in os.listdir(test_root) if f.endswith('.jpg')])
test_filenames = [os.path.basename(p) for p in test_image_paths]

test_loader_rgb = DataLoader(CustomImageDataset(test_image_paths, transform=transform_rgb, mode='rgb', is_test=True), batch_size=CFG['BATCH_SIZE'], shuffle=False)
test_loader_gray = DataLoader(CustomImageDataset(test_image_paths, transform=transform_gray, mode='gray', is_test=True), batch_size=CFG['BATCH_SIZE'], shuffle=False)

sample_submission = pd.read_csv('./sample_submission.csv', encoding='utf-8-sig')

# 모델 및 학습 함수

In [None]:
def get_model():
    model = models.convnext_base(pretrained=True)
    model.classifier[2] = nn.Linear(model.classifier[2].in_features, CFG['NUM_CLASSES'])
    return model.to(device)

class EarlyStopping:
    def __init__(self, patience=5):
        self.patience = patience
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

def train_model(model, train_loader, val_loader, model_path):
    optimizer = optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)
    criterion = nn.CrossEntropyLoss()
    stopper = EarlyStopping(patience=5)

    for epoch in range(CFG['EPOCHS']):
        model.train()
        for x, y in tqdm(train_loader):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                val_loss += criterion(model(x), y).item()
        val_loss /= len(val_loader)
        scheduler.step(val_loss)

        print(f"[Epoch {epoch+1}] Val Loss: {val_loss:.4f}")
        if val_loss < stopper.best_loss:
            torch.save(model.state_dict(), model_path)
            print("Best model saved:", model_path)
        stopper(val_loss)
        if stopper.early_stop:
            print("Early stopping.")
            break

# 추론 및 저장 함수

In [None]:
def predict(model, loader):
    model.eval()
    result = []
    with torch.no_grad():
        for x, _ in loader:
            x = x.to(device)
            probs = F.softmax(model(x), dim=1).cpu().numpy()
            result.append(probs)
    return np.concatenate(result, axis=0)

# RGB + Gray 모델 추론 병합용 함수
# 학습은 RGB/Gray 따로 하고, 추론 단계에서 softmax 결과를 병합하는 구조
# Gray 60%, RGB 40% 비율로 가중치 적용
def ensemble_predict(rgb_probs, gray_probs, w_rgb=0.4, w_gray=0.6):
    probs = w_rgb * rgb_probs + w_gray * gray_probs
    return probs

# 추론 후 저장 함수
# sample_submission 포맷을 기준으로 예측 확률을 저장합니다.
def save_submission_probs(filenames, probs, sample_df):
    df = pd.DataFrame(probs, columns=sample_df.columns[1:])
    df.insert(0, "ID", filenames)
    df = df[sample_df.columns]
    df.to_csv("baseline_submission_gray.csv", index=False, encoding="utf-8-sig")

# 실행 셀 (학습 + 추론 + 제출 저장)

In [None]:
# RGB 학습
model_rgb = get_model()
train_model(model_rgb, train_loader_rgb, val_loader_rgb, "best_model_rgb.pth")

# GRAY 학습
model_gray = get_model()
train_model(model_gray, train_loader_gray, val_loader_gray, "best_model_gray.pth")

# 최종 추론 및 앙상블 실행
model_rgb.load_state_dict(torch.load("best_model_rgb.pth"))
model_gray.load_state_dict(torch.load("best_model_gray.pth"))

rgb_pred = predict(model_rgb, test_loader_rgb)
gray_pred = predict(model_gray, test_loader_gray)

# Gray 60%, RGB 40% 비율로 softmax 확률을 앙상블
final_pred = ensemble_predict(rgb_pred, gray_pred, w_rgb=0.4, w_gray=0.6)

save_submission_probs(test_filenames, final_pred, sample_submission)