## Pytorch + ConvNeXt 첫 시도

In [1]:
import os
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from tqdm import tqdm
import timm

# 설정
train_dir = 'open/train'
test_csv_path = 'open/test.csv'
output_dir = 'pytorch_timm_output2'
os.makedirs(output_dir, exist_ok=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [2]:
# 학습 데이터 구성
df = pd.DataFrame({'image': list(Path(train_dir).rglob("*/*.jpg"))})
df['label'] = df['image'].apply(lambda x: x.parent.name)
df['image'] = df['image'].astype(str)

# 라벨 인코딩
label2idx = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))}
idx2label = {idx: label for label, idx in label2idx.items()}
df['label_idx'] = df['label'].map(label2idx)

# 데이터 샘플링 (폴더별 10,000장)
df_balanced = df.groupby('label').apply(lambda x: x.sample(n=10000, random_state=42)).reset_index(drop=True)

# train/val 분리
train_df, val_df = train_test_split(df_balanced, test_size=0.3, stratify=df_balanced['label_idx'], random_state=42)

# 커스텀 Dataset
class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['image']
        label = self.df.iloc[idx]['label_idx']
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# 이미지 변환
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# 데이터로더
train_dataset = ImageDataset(train_df, transform=transform)
val_dataset = ImageDataset(val_df, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)


  df_balanced = df.groupby('label').apply(lambda x: x.sample(n=10000, random_state=42)).reset_index(drop=True)


In [3]:
# 모델 설정 - ConvNeXt로 변경
model = timm.create_model('convnext_base', pretrained=True, num_classes=len(label2idx))
model.to(device)

# 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# 학습 루프
num_epochs = 7
best_acc = 0.0


model.safetensors:   0%|          | 0.00/354M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [4]:
images, labels = next(iter(train_loader))
print(images.shape, labels.shape)

torch.Size([16, 3, 224, 224]) torch.Size([16])


In [5]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Train Loss: {running_loss/len(train_loader):.4f}")

    # 검증
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = correct / total
    print(f"Val Accuracy: {acc*100:.2f}%")

    if acc > best_acc:
        best_acc = acc
        torch.save(model.state_dict(), os.path.join(output_dir, 'best_model.pth'))
        print("✅ 모델 저장 완료")


Epoch 1/7: 100%|███████████████████████████████████████████████████████████████████| 3063/3063 [51:08<00:00,  1.00s/it]


Train Loss: 0.7296
Val Accuracy: 78.43%
✅ 모델 저장 완료


Epoch 2/7: 100%|███████████████████████████████████████████████████████████████████| 3063/3063 [51:17<00:00,  1.00s/it]


Train Loss: 0.4555
Val Accuracy: 79.76%
✅ 모델 저장 완료


Epoch 3/7: 100%|███████████████████████████████████████████████████████████████████| 3063/3063 [51:11<00:00,  1.00s/it]


Train Loss: 0.2326
Val Accuracy: 80.41%
✅ 모델 저장 완료


Epoch 4/7: 100%|███████████████████████████████████████████████████████████████████| 3063/3063 [51:44<00:00,  1.01s/it]


Train Loss: 0.1037
Val Accuracy: 77.52%


Epoch 5/7: 100%|███████████████████████████████████████████████████████████████████| 3063/3063 [49:18<00:00,  1.04it/s]


Train Loss: 0.0753
Val Accuracy: 80.22%


Epoch 6/7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3063/3063 [49:07<00:00,  1.04it/s]


Train Loss: 0.0611
Val Accuracy: 79.53%


Epoch 7/7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3063/3063 [49:04<00:00,  1.04it/s]


Train Loss: 0.0538
Val Accuracy: 80.10%


In [6]:
# 테스트 데이터 예측
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['img_path']
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image

test_df = pd.read_csv(test_csv_path)
test_df['img_path'] = test_df['img_path'].apply(lambda x: os.path.join('open', x))

test_dataset = TestDataset(test_df, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=0)


In [7]:
# 예측
model.load_state_dict(torch.load(os.path.join(output_dir, 'best_model.pth')))
model.eval()
preds = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        preds.extend(predicted.cpu().numpy())

# 라벨 복원 및 제출 파일 생성
submission = pd.read_csv('open/sample_submission.csv')
submission['rock_type'] = [idx2label[p] for p in preds]
submission.to_csv('submission_timm2.csv', index=False)
print("🎉 제출 파일 저장 완료!")


🎉 제출 파일 저장 완료!


##

### 전체 코드

In [None]:
import os

os.cpu_count()