In [1]:
import os
from tqdm import tqdm

from typing import Tuple, Sequence, Callable
import csv
import cv2
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

import random
from torchvision import transforms
from cvtorchvision import cvtransforms
from torchvision.models import resnet50

In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    if torch.cuda.device_count() > 0:
        torch.cuda.manual_seed_all(seed)

In [3]:
set_seed(999)

In [4]:
os.getcwd()

'/repo/dacon/alphabet_classification/code'

In [5]:
os.chdir('../data')

In [6]:
os.getcwd()

'/repo/dacon/alphabet_classification/data'

In [7]:
class MnistDataset(Dataset):
    def __init__(
        self,
        dir: os.PathLike,
        image_ids: os.PathLike,
        transforms: Sequence[Callable]
    ) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        # openCV로 변환 작업중 : BGR
        image = cv2.imread(
            os.path.join(
                self.dir, f'{str(image_id).zfill(5)}.png'))

        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

In [8]:
# 외부 패키지 사용 : cvtorchvision
transforms_train = cvtransforms.Compose([
    cvtransforms.RandomHorizontalFlip(p=0.5),
    cvtransforms.RandomVerticalFlip(p=0.5),
    cvtransforms.RandomApply([cvtransforms.RandomRotation([-15, 15])], p=0.3), #add(02.02)
    cvtransforms.RandomApply([cvtransforms.RandomRotation([-30, 30])], p=0.3), #add(02.02)
    cvtransforms.RandomApply([cvtransforms.RandomPerspective()], p=0.3), #add(02.02)
    cvtransforms.ToTensor(),
    cvtransforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
   )
])

transforms_test = transforms.Compose([
    cvtransforms.ToTensor(),
    cvtransforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

In [9]:
trainset = MnistDataset('dirty_mnist_2nd','dirty_mnist_2nd_answer.csv', transforms_train)
testset = MnistDataset('test_dirty_mnist_2nd','sample_submission.csv', transforms_test)

train_loader = DataLoader(trainset, batch_size=32, num_workers=8)
test_loader = DataLoader(testset, batch_size=32, num_workers=4)

In [10]:
class MnistModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
scaler = GradScaler()

model = MnistModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MultiLabelSoftMarginLoss()

num_epochs = 100
model.train()

for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        outputs = model(images)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            outputs = outputs > 0.5
            acc = (outputs == targets).float().mean()
            print(f'{epoch}: {loss.item():.5f}, {acc.item():.5f}')

0.96635
93: 0.10287, 0.95673
93: 0.09668, 0.96394
93: 0.11013, 0.95433
93: 0.16030, 0.92308
93: 0.09021, 0.96635
93: 0.09148, 0.97236
93: 0.11188, 0.96154
93: 0.11729, 0.95553
93: 0.08871, 0.95913
93: 0.11155, 0.95673
93: 0.13185, 0.94832
93: 0.09415, 0.96394
93: 0.13610, 0.94351
93: 0.08386, 0.96875
93: 0.13464, 0.94231
93: 0.13009, 0.94471
94: 0.13260, 0.95192
94: 0.08979, 0.96034
94: 0.10232, 0.96514
94: 0.11436, 0.96034
94: 0.11130, 0.95793
94: 0.09676, 0.96394
94: 0.10943, 0.95433
94: 0.10558, 0.95313
94: 0.10970, 0.96274
94: 0.13328, 0.94712
94: 0.10814, 0.96635
94: 0.13872, 0.94712
94: 0.12294, 0.95313
94: 0.09898, 0.96154
94: 0.11351, 0.95913
94: 0.09194, 0.95793
94: 0.14471, 0.95192
94: 0.10623, 0.96034
94: 0.10676, 0.95192
94: 0.12795, 0.95072
94: 0.10351, 0.95072
94: 0.12578, 0.95433
94: 0.12065, 0.95433
94: 0.10163, 0.95433
94: 0.11522, 0.95553
94: 0.11432, 0.95673
94: 0.14567, 0.95673
94: 0.11719, 0.95313
94: 0.10978, 0.95793
94: 0.17304, 0.93510
94: 0.10539, 0.96154
94: 0

In [12]:
submit = pd.read_csv('sample_submission.csv')

model.eval()
batch_size = test_loader.batch_size
batch_index = 0
for i, (images, targets) in enumerate(test_loader):
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    outputs = outputs > 0.5
    batch_index = i * batch_size
    submit.iloc[batch_index:batch_index+batch_size, 1:] = \
        outputs.long().squeeze(0).detach().cpu().numpy()
    
submit.to_csv('../submission/0202_bgr_res50_submit.csv', index=False)