In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm
import optuna

# ✅ 사용자 정의 데이터셋 클래스
class ScrapDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.data = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.loc[idx, 'filename'])
        image = Image.open(img_path).convert('RGB')
        label = self.data.loc[idx, 'label']
        if self.transform:
            image = self.transform(image)
        return image, label

# ✅ 학습 함수
def train_model(model, train_loader, val_loader, criterion, optimizer, device, epochs=10):
    model.to(device)
    best_val_acc = 0

    for epoch in range(epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # 검증
        model.eval()
        val_preds, val_targets = [], []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                preds = torch.argmax(outputs, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

        acc = accuracy_score(val_targets, val_preds)
        best_val_acc = max(best_val_acc, acc)

    return best_val_acc

# ✅ Optuna 목적 함수 정의 (optimizer 고정)
def objective(trial):
    df = pd.read_csv("train.csv")
    df['label'] = df['weight_class'].map({1: 0, 2: 1, 3: 2})
    img_dir = "train_images"

    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    accuracies = []

    # 튜닝 대상: optimizer는 고정
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-3)
    weight_decay = trial.suggest_loguniform("weight_decay", 1e-6, 1e-2)
    dropout_rate = trial.suggest_uniform("dropout_rate", 0.0, 0.5)

    for train_idx, val_idx in kf.split(df):
        train_df = df.iloc[train_idx]
        val_df = df.iloc[val_idx]

        train_dataset = ScrapDataset(train_df, img_dir, transform)
        val_dataset = ScrapDataset(val_df, img_dir, transform)
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

        model = timm.create_model('coat_lite_medium', pretrained=True, num_classes=3, drop_rate=dropout_rate)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

        acc = train_model(model, train_loader, val_loader, criterion, optimizer, device)
        accuracies.append(acc)

    return np.mean(accuracies)

# ✅ Optuna 최적화 실행
if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10)

    print("✅ Best Hyperparameters:", study.best_trial.params)
