In [1]:
# 필요한 라이브러리 불러오기
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import os
import timm
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder

# 사용자 정의 Dataset 클래스
class ScrapClassificationDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, label_encoder=None):
        self.data = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.label_encoder = label_encoder or LabelEncoder()
        self.data['class_idx'] = self.label_encoder.fit_transform(self.data['weight_class'])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx]['filename'])
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.data.iloc[idx]['class_idx'], dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label, self.data.iloc[idx]['filename']

# Swin-Tiny 분류 모델 정의
class SwinTinyClassifier(nn.Module):
    def __init__(self):
        super(SwinTinyClassifier, self).__init__()
        self.backbone = timm.create_model('swin_tiny_patch4_window7_224', pretrained=True, num_classes=3)

    def forward(self, x):
        return self.backbone(x)

# 예측 결과 수집 함수
def get_predictions(model, dataloader, device, label_encoder):
    model.eval()
    results = []
    with torch.no_grad():
        for images, _, filenames in dataloader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            pred_classes = label_encoder.inverse_transform(preds.cpu().numpy())
            for fname, pred_cls in zip(filenames, pred_classes):
                results.append({
                    "filename": fname,
                    "predicted_label": pred_cls
                })
    return results

# 경로 설정 (본인 환경에 맞게 수정)
csv_path = r"C:\Users\pyw20\OneDrive\바탕 화면\work\train.csv"
img_dir = r"C:\Users\pyw20\OneDrive\바탕 화면\work\train_images"

# 데이터 불러오기 및 전처리 정의
df = pd.read_csv(csv_path)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# KFold 설정
kf = KFold(n_splits=5, shuffle=True, random_state=42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
all_predictions = []

# K-Fold 순회
for fold, (train_idx, test_idx) in enumerate(kf.split(df)):
    print(f"🔁 Fold {fold+1}")
    train_df, test_df = df.iloc[train_idx], df.iloc[test_idx]

    # 클래스 라벨 인코딩
    label_encoder = LabelEncoder()
    label_encoder.fit(train_df['weight_class'])

    # 데이터셋 구성
    train_dataset = ScrapClassificationDataset(train_df, img_dir, transform, label_encoder)
    test_dataset = ScrapClassificationDataset(test_df, img_dir, transform, label_encoder)
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=8)

    # 모델 초기화
    model = SwinTinyClassifier().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=1e-4)

    # 학습 루프 (Epoch = 5)
    for epoch in range(5):
        model.train()
        for images, labels, _ in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # 예측 결과 저장
    predictions = get_predictions(model, test_loader, device, label_encoder)
    all_predictions.extend(predictions)

# 최종 결과 저장
result_df = pd.DataFrame(all_predictions)
result_df.to_csv("predictions_only.csv", index=False)
print("✅ 예측 결과가 predictions_only.csv 파일로 저장되었습니다.")


🔁 Fold 1
🔁 Fold 2
🔁 Fold 3
🔁 Fold 4
🔁 Fold 5
✅ 예측 결과가 predictions_only.csv 파일로 저장되었습니다.
