In [1]:
import os
from tqdm import tqdm

from typing import Tuple, Sequence, Callable
import csv
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

from torchvision import transforms
from cvtorchvision import cvtransforms
from torchvision.models import resnet50

In [2]:
os.getcwd()

'/repo/dacon/alphabet_classification/code'

In [3]:
os.chdir('../data')

In [4]:
os.getcwd()

'/repo/dacon/alphabet_classification/data'

In [5]:
class MnistDataset(Dataset):
    def __init__(
        self,
        dir: os.PathLike,
        image_ids: os.PathLike,
        transforms: Sequence[Callable]
    ) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        # openCV로 변환 작업중 : BGR -> HSV -> V
        image = cv2.split(cv2.cvtColor(
                            cv2.imread(
                                os.path.join(
                                    self.dir, f'{str(image_id).zfill(5)}.png'
                                            )
                                        ), cv2.COLOR_BGR2HSV)
                            )[2]
        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

In [6]:
# 외부 패키지 사용 : cvtorchvision
transforms_train = cvtransforms.Compose([
    cvtransforms.RandomHorizontalFlip(p=0.5),
    cvtransforms.RandomVerticalFlip(p=0.5),
    cvtransforms.ToTensor(),
    cvtransforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
   )
])

transforms_test = transforms.Compose([
    cvtransforms.ToTensor(),
    cvtransforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

In [7]:
trainset = MnistDataset('dirty_mnist','dirty_mnist_answer.csv', transforms_train)
testset = MnistDataset('test_dirty_mnist','sample_submission.csv', transforms_test)

train_loader = DataLoader(trainset, batch_size=32, num_workers=8)
test_loader = DataLoader(testset, batch_size=32, num_workers=4)

In [8]:
class MnistModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
scaler = GradScaler()

model = MnistModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MultiLabelSoftMarginLoss()

num_epochs = 10
model.train()

for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        outputs = model(images)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            outputs = outputs > 0.5
            acc = (outputs == targets).float().mean()
            print(f'{epoch}: {loss.item():.5f}, {acc.item():.5f}')

3: 0.52347, 0.72716
3: 0.50850, 0.71755
3: 0.53056, 0.69471
3: 0.53490, 0.68990
3: 0.53445, 0.70072
3: 0.52073, 0.72356
3: 0.52947, 0.70913
3: 0.56163, 0.69832
3: 0.50457, 0.72957
3: 0.49403, 0.72356
3: 0.53102, 0.70072
3: 0.53207, 0.70433
3: 0.48462, 0.73317
3: 0.55688, 0.68630
3: 0.50254, 0.71755
3: 0.53139, 0.71995
3: 0.53465, 0.71394
3: 0.52261, 0.71514
3: 0.52507, 0.70673
3: 0.48956, 0.72957
3: 0.51573, 0.69952
3: 0.53086, 0.70793
3: 0.52792, 0.70553
3: 0.49034, 0.73317
3: 0.50598, 0.72115
3: 0.51087, 0.71154
3: 0.51546, 0.70913
3: 0.50412, 0.71635
3: 0.49083, 0.73317
3: 0.50618, 0.71154
3: 0.51177, 0.72236
3: 0.49316, 0.72476
3: 0.47510, 0.75120
3: 0.51388, 0.71514
3: 0.51155, 0.71755
3: 0.47364, 0.72596
3: 0.51301, 0.71274
3: 0.47582, 0.74639
3: 0.49513, 0.72957
3: 0.46560, 0.76202
3: 0.50661, 0.73558
3: 0.47166, 0.75481
3: 0.49489, 0.72476
3: 0.50101, 0.71635
3: 0.52127, 0.72957
3: 0.50670, 0.71154
3: 0.49255, 0.72837
3: 0.49918, 0.73197
3: 0.50508, 0.72596
3: 0.50157, 0.73438


In [10]:
submit = pd.read_csv('sample_submission.csv')

model.eval()
batch_size = test_loader.batch_size
batch_index = 0
for i, (images, targets) in enumerate(test_loader):
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    outputs = outputs > 0.5
    batch_index = i * batch_size
    submit.iloc[batch_index:batch_index+batch_size, 1:] = \
        outputs.long().squeeze(0).detach().cpu().numpy()
    
submit.to_csv('../submission/opencv_v_res50_submit.csv', index=False)