In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
import timm
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np
import random
import warnings
warnings.filterwarnings("ignore")

# 사용자 정의 Dataset
class ScrapClassificationDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, label_encoder=None):
        self.data = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.label_encoder = label_encoder or LabelEncoder()
        self.data['class_idx'] = self.label_encoder.fit_transform(self.data['weight_class'])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx]['filename'])
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.data.iloc[idx]['class_idx'], dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label

# 모델 정의
class CoaTMediumClassifier(nn.Module):
    def __init__(self):
        super(CoaTMediumClassifier, self).__init__()
        self.backbone = timm.create_model('coat_lite_medium', pretrained=True, num_classes=3)
    def forward(self, x):
        return self.backbone(x)

# 평가 함수
def evaluate(model, dataloader, device):
    model.eval()
    all_preds, all_labels = [], []
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = correct / total * 100
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    return acc, precision, recall, f1

# 경로 설정
csv_path = r"C:\Users\pyw20\OneDrive\바탕 화면\work\train.csv"
img_dir = r"C:\Users\pyw20\OneDrive\바탕 화면\work\train_images"
df = pd.read_csv(csv_path)

# 기본 및 증강 transform
base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])
augmentation_transforms = [
    transforms.RandomHorizontalFlip(p=0.6),
    transforms.RandomVerticalFlip(p=0.6),
    transforms.ColorJitter(brightness=0.3, contrast=0.3)
]

# 5-Fold 학습 시작
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_metrics = []

for fold, (train_idx, test_idx) in enumerate(kf.split(df)):
    print(f"\n📁 Fold {fold+1}")
    train_df, test_df = df.iloc[train_idx], df.iloc[test_idx]

    aug_images, aug_labels = [], []
    label_encoder = LabelEncoder()
    train_df['class_idx'] = label_encoder.fit_transform(train_df['weight_class'])

    for _, row in train_df.iterrows():
        img_path = os.path.join(img_dir, row['filename'])
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(row['class_idx'], dtype=torch.long)
        aug_images.append(base_transform(image))
        aug_labels.append(label)

        # 9배 증강 (확률적으로 적용)
        for _ in range(9):
            transform = transforms.Compose([
                transforms.Resize((224, 224)),
                random.choice(augmentation_transforms),
                transforms.ToTensor(),
                transforms.Normalize([0.5]*3, [0.5]*3)
            ])
            aug_images.append(transform(image))
            aug_labels.append(label)

    train_dataset = TensorDataset(torch.stack(aug_images), torch.stack(aug_labels))
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

    test_dataset = ScrapClassificationDataset(test_df, img_dir, transform=base_transform, label_encoder=label_encoder)
    test_loader = DataLoader(test_dataset, batch_size=8)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = CoaTMediumClassifier().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)

    for epoch in range(10):
        model.train()
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"[Fold {fold+1}][Epoch {epoch+1}] Loss: {total_loss / len(train_loader):.4f}")

    acc, precision, recall, f1 = evaluate(model, test_loader, device)
    print(f"✅ Fold {fold+1} - Accuracy: {acc:.2f}% | Precision: {precision:.4f} | Recall: {recall:.4f} | F1: {f1:.4f}")
    fold_metrics.append((acc, precision, recall, f1))

# 평균 결과 출력
mean_metrics = np.mean(fold_metrics, axis=0)
print("\n📊 Cross-Validation 평균 결과:")
print(f"✅ Accuracy: {mean_metrics[0]:.2f}%")
print(f"📍 Precision: {mean_metrics[1]:.4f} | Recall: {mean_metrics[2]:.4f} | F1 Score: {mean_metrics[3]:.4f}")

# (선택) 윈도우에서 알림 소리
try:
    import winsound
    winsound.Beep(1000, 700)
except:
    print("\a")



📁 Fold 1
[Fold 1][Epoch 1] Loss: 0.2741
[Fold 1][Epoch 2] Loss: 0.0032
[Fold 1][Epoch 3] Loss: 0.0008
[Fold 1][Epoch 4] Loss: 0.0005
[Fold 1][Epoch 5] Loss: 0.0003
[Fold 1][Epoch 6] Loss: 0.0003
[Fold 1][Epoch 7] Loss: 0.0002
[Fold 1][Epoch 8] Loss: 0.0002
[Fold 1][Epoch 9] Loss: 0.0001
[Fold 1][Epoch 10] Loss: 0.0001
✅ Fold 1 - Accuracy: 47.62% | Precision: 0.4901 | Recall: 0.4917 | F1: 0.4777

📁 Fold 2
[Fold 2][Epoch 1] Loss: 0.2195
[Fold 2][Epoch 2] Loss: 0.0044
[Fold 2][Epoch 3] Loss: 0.0293
[Fold 2][Epoch 4] Loss: 0.0091
[Fold 2][Epoch 5] Loss: 0.0003
[Fold 2][Epoch 6] Loss: 0.0002
[Fold 2][Epoch 7] Loss: 0.0002
[Fold 2][Epoch 8] Loss: 0.0002
[Fold 2][Epoch 9] Loss: 0.0001
[Fold 2][Epoch 10] Loss: 0.0001
✅ Fold 2 - Accuracy: 60.00% | Precision: 0.6556 | Recall: 0.6566 | F1: 0.5613

📁 Fold 3
[Fold 3][Epoch 1] Loss: 0.2632
[Fold 3][Epoch 2] Loss: 0.0061
[Fold 3][Epoch 3] Loss: 0.0008
[Fold 3][Epoch 4] Loss: 0.0005
[Fold 3][Epoch 5] Loss: 0.0004
[Fold 3][Epoch 6] Loss: 0.0003
[Fold 

KeyboardInterrupt: 