In [22]:
import os
from tqdm import tqdm

from typing import Tuple, Sequence, Callable
import csv
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

from torchvision import transforms
from torchvision.models import resnet50

In [14]:
os.getcwd()

'/repo/dacon/alphabet_classification/data'

In [15]:
os.chdir('../data')

In [16]:
os.getcwd()

'/repo/dacon/alphabet_classification/data'

In [17]:
class MnistDataset(Dataset):
    def __init__(
        self,
        dir: os.PathLike,
        image_ids: os.PathLike,
        transforms: Sequence[Callable]
    ) -> None:
        self.dir = dir
        self.transforms = transforms

        self.labels = {}
        with open(image_ids, 'r') as f:
            reader = csv.reader(f)
            next(reader)
            for row in reader:
                self.labels[int(row[0])] = list(map(int, row[1:]))

        self.image_ids = list(self.labels.keys())

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, index: int) -> Tuple[Tensor]:
        image_id = self.image_ids[index]
        image = Image.open(
            os.path.join(
                self.dir, f'{str(image_id).zfill(5)}.png')).convert('RGB')
                #self.dir, f'{str(image_id).zfill(5)}.png')).convert('HSV')

        target = np.array(self.labels.get(image_id)).astype(np.float32)

        if self.transforms is not None:
            image = self.transforms(image)

        return image, target

In [18]:
transforms_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
   )
])

transforms_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
])

In [19]:
trainset = MnistDataset('dirty_mnist','dirty_mnist_answer.csv', transforms_train)
testset = MnistDataset('test_dirty_mnist','sample_submission.csv', transforms_test)

train_loader = DataLoader(trainset, batch_size=32, num_workers=8)
test_loader = DataLoader(testset, batch_size=32, num_workers=4)

In [20]:
class MnistModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet = resnet50(pretrained=True)
        self.classifier = nn.Linear(1000, 26)

    def forward(self, x):
        x = self.resnet(x)
        x = self.classifier(x)

        return x

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
scaler = GradScaler()

model = MnistModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MultiLabelSoftMarginLoss()

num_epochs = 10
model.train()

for epoch in range(num_epochs):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()

        images = images.to(device)
        targets = targets.to(device)

        outputs = model(images)
        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            outputs = outputs > 0.5
            acc = (outputs == targets).float().mean()
            print(f'{epoch}: {loss.item():.5f}, {acc.item():.5f}')

3: 0.53816, 0.70312
3: 0.54151, 0.68990
3: 0.54391, 0.68990
3: 0.54927, 0.67188
3: 0.57604, 0.67308
3: 0.54729, 0.68750
3: 0.53920, 0.69591
3: 0.56553, 0.68149
3: 0.52132, 0.71514
3: 0.52492, 0.70192
3: 0.53949, 0.67788
3: 0.55243, 0.66947
3: 0.50856, 0.71394
3: 0.56048, 0.67548
3: 0.53302, 0.69952
3: 0.54228, 0.69231
3: 0.53933, 0.70793
3: 0.55018, 0.69591
3: 0.56772, 0.67188
3: 0.51116, 0.71034
3: 0.56537, 0.67909
3: 0.53422, 0.70072
3: 0.54120, 0.67909
3: 0.50893, 0.70793
3: 0.52428, 0.69832
3: 0.50851, 0.69952
3: 0.54884, 0.68149
3: 0.53952, 0.70312
3: 0.50503, 0.71875
3: 0.52255, 0.71154
3: 0.53443, 0.69712
3: 0.51863, 0.70312
3: 0.49951, 0.71394
3: 0.54630, 0.67909
3: 0.53180, 0.69591
3: 0.50039, 0.70913
3: 0.53985, 0.69591
3: 0.52585, 0.71635
3: 0.51178, 0.71875
3: 0.51317, 0.72476
3: 0.51581, 0.72115
3: 0.51277, 0.71755
3: 0.51476, 0.72356
3: 0.51039, 0.70192
3: 0.53947, 0.69712
3: 0.51333, 0.71394
3: 0.51901, 0.70072
3: 0.54267, 0.69712
3: 0.53113, 0.68750
3: 0.53622, 0.70192


In [24]:
submit = pd.read_csv('sample_submission.csv')

model.eval()
batch_size = test_loader.batch_size
batch_index = 0
for i, (images, targets) in enumerate(test_loader):
    images = images.to(device)
    targets = targets.to(device)
    outputs = model(images)
    outputs = outputs > 0.5
    batch_index = i * batch_size
    submit.iloc[batch_index:batch_index+batch_size, 1:] = \
        outputs.long().squeeze(0).detach().cpu().numpy()
    
submit.to_csv('../submission/rgb_res50_submit.csv', index=False)
#submit.to_csv('../submission/hsv_submit.csv', index=False)