In [76]:
import random
import os
import numpy as np
import torch
import pandas as pd
import copy
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from transformers import AutoModelForImageClassification, AutoImageProcessor
from tqdm import tqdm


In [77]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.mps.manual_seed(seed)

seed_everything(42) # Seed 고정

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: mps


In [78]:
# 1. 데이터 불러오기
df = pd.read_csv('./train.csv')
train_df, valid_df = train_test_split(df, test_size=0.1, random_state=42)

In [79]:
# 2. 레이블 인코딩
labels = df['label'].unique()
label_to_idx = {label: idx for idx, label in enumerate(labels)}
idx_to_label = {idx: label for label, idx in label_to_idx.items()}

In [80]:
class BirdDataset(Dataset):
    def __init__(self, dataframe, transform=None, use_upscaled=True, is_test=False):
        self.dataframe = dataframe
        self.transform = transform
        self.use_upscaled = use_upscaled
        self.is_test = is_test

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if self.use_upscaled:
            img_path = self.dataframe.iloc[idx]['upscale_img_path']
        else:
            img_path = self.dataframe.iloc[idx]['img_path']
        
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        if self.is_test:
            return image
        else:
            label = self.dataframe.iloc[idx]['label']
            label_idx = label_to_idx[label]
            return image, label_idx

In [81]:
# 4. 데이터 변환 정의
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

valid_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [82]:
# 데이터셋 생성
train_dataset = BirdDataset(train_df, transform=train_transform, use_upscaled=True)
valid_dataset = BirdDataset(valid_df, transform=valid_transform, use_upscaled=True)

# 데이터로더 생성
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

In [83]:
from transformers import AutoModelForImageClassification, AutoConfig

# 6. 모델 정의 부분을 다음과 같이 수정
model_name = "microsoft/swinv2-base-patch4-window8-256"

# 설정 로드
config = AutoConfig.from_pretrained(model_name)
config.num_labels = len(labels)
config.id2label = idx_to_label
config.label2id = label_to_idx

# 모델 로드 및 분류기 교체
model = AutoModelForImageClassification.from_pretrained(
    model_name,
    config=config,
    ignore_mismatched_sizes=True
)

# 새로운 분류기 초기화
model.classifier = torch.nn.Linear(model.classifier.in_features, len(labels))

Some weights of Swinv2ForImageClassification were not initialized from the model checkpoint at microsoft/swinv2-base-patch4-window8-256 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([25, 1024]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([25]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [84]:
def train(model, optimizer, train_loader, val_loader, scheduler, device, num_epochs=5):
    best_model = None
    best_accuracy = 0.0
    criterion = torch.nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        # 훈련 단계
        model.train()
        train_loss = 0.0
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        for images, labels in train_pbar:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        train_loss /= len(train_loader)

        # 검증 단계
        model.eval()
        valid_loss = 0.0
        correct = 0
        total = 0
        val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Valid]')
        with torch.no_grad():
            for images, labels in val_pbar:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images).logits
                loss = criterion(outputs, labels)
                valid_loss += loss.item()
                
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
                
                val_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        valid_loss /= len(val_loader)
        accuracy = correct / total

        # 학습률 조정
        scheduler.step(valid_loss)  # 여기서 valid_loss를 전달합니다.

        # 최고 성능 모델 저장
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = copy.deepcopy(model)

        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}')
        print(f'Valid Loss: {valid_loss:.4f}')
        print(f'Valid Accuracy: {accuracy:.4f}')
        print(f'Best Accuracy: {best_accuracy:.4f}')
        print('-' * 50)

    return best_model

In [85]:
num_epochs = 5
model.to(device)
# 7. 손실 함수와 옵티마이저 정의
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True)

trained_model = train(model, optimizer, train_loader, valid_loader, scheduler, device, num_epochs)

Epoch 1/5 [Train]: 100%|██████████| 446/446 [25:02<00:00,  3.37s/it, loss=0.0150]
Epoch 1/5 [Valid]: 100%|██████████| 50/50 [00:51<00:00,  1.04s/it, loss=0.0038]


Epoch 1/5:
Train Loss: 0.5312
Valid Loss: 0.0609
Valid Accuracy: 0.9861
Best Accuracy: 0.9861
--------------------------------------------------


Epoch 2/5 [Train]:  45%|████▍     | 200/446 [11:17<13:53,  3.39s/it, loss=0.0268]


KeyboardInterrupt: 

In [None]:
# 9. 모델 저장
torch.save(trained_model.state_dict(), 'bird_classifier_upscale.pth')
print("Model saved.")

Model saved.


In [None]:
test = pd.read_csv('./test.csv')

test_dataset = BirdDataset(test, transform=valid_transform, use_upscaled=False, is_test=True)

test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(test_loader):
            imgs = imgs.to(device)
            
            outputs = model(imgs).logits
            
            preds.extend(outputs.argmax(1).cpu().numpy())
    
    return preds

In [None]:
# 모델 로드
model.load_state_dict(torch.load('bird_classifier_upscale.pth'))
model.to(device)

# 추론 실행
preds = inference(model, test_loader, device)

100%|██████████| 1697/1697 [03:43<00:00,  7.58it/s]


In [None]:
# 예측 결과를 원래 레이블로 변환
preds = [idx_to_label[pred] for pred in preds]

submit = pd.read_csv('./sample_submission.csv')

In [None]:
# Submission 파일 생성
submission = pd.DataFrame({'id': test['id'], 'label': preds})
submission.to_csv('swinv2_upscale.csv', index=False)