In [1]:
import os
from tqdm import tqdm

from typing import Tuple, Sequence, Callable
import csv
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

from torchvision import transforms
from torchvision.models import resnet18 

In [2]:
os.getcwd()

'/repo/dacon/alphabet_classification/code'

In [3]:
os.chdir('../data')

In [4]:
os.getcwd()

'/repo/dacon/alphabet_classification/data'

In [5]:
class MnistDataset(Dataset):
    def __init__(
        self,
        dir: os.PathLike,
        image_ids: os.PathLike,
        transforms: Sequence[Callable]
    ) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        image = Image.open(
            os.path.join(
                #self.dir, f'{str(image_id).zfill(5)}.png')).convert('RGB')
                self.dir, f'{str(image_id).zfill(5)}.png')).convert('HSV')

        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

In [6]:
transforms_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
   )
])

transforms_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

In [7]:
trainset = MnistDataset('dirty_mnist','dirty_mnist_answer.csv', transforms_train)
testset = MnistDataset('test_dirty_mnist','sample_submission.csv', transforms_test)

train_loader = DataLoader(trainset, batch_size=256, num_workers=8)
test_loader = DataLoader(testset, batch_size=32, num_workers=4)

In [8]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x7febe4725278>

In [9]:
test_loader

<torch.utils.data.dataloader.DataLoader at 0x7febe4725320>

In [10]:
class MnistModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet18(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
scaler = GradScaler()

model = MnistModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MultiLabelSoftMarginLoss()

num_epochs = 10
model.train()

for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        outputs = model(images)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            outputs = outputs > 0.5
            acc = (outputs == targets).float().mean()
            print(f'{epoch}: {loss.item():.5f}, {acc.item():.5f}')

0: 0.69256, 0.53831
0: 0.68875, 0.54447
0: 0.68518, 0.53816
0: 0.68003, 0.55634
0: 0.67733, 0.55048
0: 0.67974, 0.54763
0: 0.67442, 0.55123
0: 0.66878, 0.57136
0: 0.66400, 0.57151
0: 0.66270, 0.57828
0: 0.65252, 0.57121
0: 0.65101, 0.57978
0: 0.64486, 0.59044
0: 0.64214, 0.59240
0: 0.64295, 0.59195
0: 0.62721, 0.60742
0: 0.62856, 0.60472
0: 0.62389, 0.61944
0: 0.62590, 0.61313
1: 0.61241, 0.63056
1: 0.61680, 0.62800
1: 0.60922, 0.63717
1: 0.59358, 0.64603
1: 0.58953, 0.64663
1: 0.58996, 0.64829
1: 0.58401, 0.65685
1: 0.58242, 0.66737
1: 0.58225, 0.66106
1: 0.56903, 0.67653
1: 0.56365, 0.67233
1: 0.55047, 0.67834
1: 0.54934, 0.69171
1: 0.54477, 0.68825
1: 0.55820, 0.68735
1: 0.54171, 0.69591
1: 0.53649, 0.69892
1: 0.53335, 0.70463
1: 0.53527, 0.70162
2: 0.53311, 0.70328
2: 0.53146, 0.70553
2: 0.52347, 0.71034
2: 0.50414, 0.72731
2: 0.51055, 0.72912
2: 0.51255, 0.72791
2: 0.51382, 0.72641
2: 0.50424, 0.72957
2: 0.49947, 0.72912
2: 0.49716, 0.73663
2: 0.49687, 0.72927
2: 0.47378, 0.75150


In [12]:
submit = pd.read_csv('sample_submission.csv')

model.eval()
batch_size = test_loader.batch_size
batch_index = 0
for i, (images, targets) in enumerate(test_loader):
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    outputs = outputs > 0.5
    batch_index = i * batch_size
    submit.iloc[batch_index:batch_index+batch_size, 1:] = \
        outputs.long().squeeze(0).detach().cpu().numpy()
    
#submit.to_csv('../submission/rgb_submit.csv', index=False)
submit.to_csv('../submission/hsv_submit.csv', index=False)