In [None]:
# !chmod 600 /root/.kaggle/kaggle.json
# !kaggle competitions download -c captcha-hacker
# !unzip captcha-hacker.zip

In [None]:
import os
import random

import numpy as np

import torch as t
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision as tv
from torchvision.models import vgg19, VGG19_Weights

from skimage.io import imread

In [None]:
class Config():

    def __init__(self):

        self.device = 'cuda'
        self.seed = 0

        self.datasets_dir = ''

        self.n_epoch = 20
        self.batch_size = 128
        self.n_worker = 2
        self.lr = 0.001

        self.ckpts_dir = ''

        return

In [None]:
class Task1(Dataset):

    def __init__(self, cfg):

        self.cfg = cfg


        img_name_ls = os.listdir(os.path.join(self.cfg.datasets_dir, 'task1/'))
        self.item_ls = []

        if 'train' in self.cfg.datasets_dir:
            
            img_path2label = {}
            with open(os.path.join(self.cfg.datasets_dir, 'annotations.csv'), 'r') as f:
                for line in f.readlines():

                    value_ls = line.strip('\n').split(',')
                    img_path2label[value_ls[0]] = value_ls[1]

            for img_name in img_name_ls:

                item = {'img_name': img_name,
                        'label': img_path2label[os.path.join('task1/', img_name)]}
                self.item_ls.append(item)

        elif 'test' in self.cfg.datasets_dir:

            for img_name in img_name_ls:

                item = {'img_name': img_name}
                self.item_ls.append(item)

        
        self.transforms = tv.transforms.Compose([
            tv.transforms.ToPILImage(),
            tv.transforms.ToTensor(),
            tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        
        return

    def __getitem__(self, idx):

        img_name = self.item_ls[idx]['img_name']
        x = imread(os.path.join(self.cfg.datasets_dir, 'task1/', img_name))
        x = self.transforms(x)

        if 'train' in self.cfg.datasets_dir:

            y0 = '0123456789'.find(self.item_ls[idx]['label'][0])

            return x, y0

        elif 'test' in self.cfg.datasets_dir:
            return x, img_name

    def __len__(self):
        return len(self.item_ls)


class Task2(Dataset):

    def __init__(self, cfg):

        self.cfg = cfg


        img_name_ls = os.listdir(os.path.join(self.cfg.datasets_dir, 'task2/'))
        self.item_ls = []

        if 'train' in self.cfg.datasets_dir:
            
            img_path2label = {}
            with open(os.path.join(self.cfg.datasets_dir, 'annotations.csv'), 'r') as f:
                for line in f.readlines():

                    value_ls = line.strip('\n').split(',')
                    img_path2label[value_ls[0]] = value_ls[1]

            for img_name in img_name_ls:

                item = {'img_name': img_name,
                        'label': img_path2label[os.path.join('task2/', img_name)]}
                self.item_ls.append(item)

        elif 'test' in self.cfg.datasets_dir:

            for img_name in img_name_ls:

                item = {'img_name': img_name}
                self.item_ls.append(item)

        
        self.transforms = tv.transforms.Compose([
            tv.transforms.ToPILImage(),
            tv.transforms.ToTensor(),
            tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        
        return

    def __getitem__(self, idx):

        img_name = self.item_ls[idx]['img_name']
        x = imread(os.path.join(self.cfg.datasets_dir, 'task2/', img_name))
        x = self.transforms(x)

        if 'train' in self.cfg.datasets_dir:

            y0 = '0123456789abcdefghijklmnopqrstuvwxyz'.find(self.item_ls[idx]['label'][0])
            y1 = '0123456789abcdefghijklmnopqrstuvwxyz'.find(self.item_ls[idx]['label'][1])

            return x, y0, y1

        elif 'test' in self.cfg.datasets_dir:
            return x, img_name

    def __len__(self):
        return len(self.item_ls)


class Task3(Dataset):

    def __init__(self, cfg):

        self.cfg = cfg


        img_name_ls = os.listdir(os.path.join(self.cfg.datasets_dir, 'task3/'))
        self.item_ls = []

        if 'train' in self.cfg.datasets_dir:
            
            img_path2label = {}
            with open(os.path.join(self.cfg.datasets_dir, 'annotations.csv'), 'r') as f:
                for line in f.readlines():

                    value_ls = line.strip('\n').split(',')
                    img_path2label[value_ls[0]] = value_ls[1]

            for img_name in img_name_ls:

                item = {'img_name': img_name,
                        'label': img_path2label[os.path.join('task3/', img_name)]}
                self.item_ls.append(item)

        elif 'test' in self.cfg.datasets_dir:

            for img_name in img_name_ls:

                item = {'img_name': img_name}
                self.item_ls.append(item)
        

        self.transforms = tv.transforms.Compose([
            tv.transforms.ToPILImage(),
            tv.transforms.ToTensor(),
            tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])
        
        return

    def __getitem__(self, idx):

        img_name = self.item_ls[idx]['img_name']
        x = imread(os.path.join(self.cfg.datasets_dir, 'task3/', img_name))
        x = self.transforms(x)

        if 'train' in self.cfg.datasets_dir:

            y0 = '0123456789abcdefghijklmnopqrstuvwxyz'.find(self.item_ls[idx]['label'][0])
            y1 = '0123456789abcdefghijklmnopqrstuvwxyz'.find(self.item_ls[idx]['label'][1])
            y2 = '0123456789abcdefghijklmnopqrstuvwxyz'.find(self.item_ls[idx]['label'][2])
            y3 = '0123456789abcdefghijklmnopqrstuvwxyz'.find(self.item_ls[idx]['label'][3])

            return x, y0, y1, y2, y3

        elif 'test' in self.cfg.datasets_dir:
            return x, img_name

    def __len__(self):
        return len(self.item_ls)

In [None]:
class Model1(nn.Module):

    def __init__(self):
        super().__init__()

        self.F = nn.Sequential(*list(vgg19(weights=VGG19_Weights.IMAGENET1K_V1).children())[:-2])
        self.AvgPool = nn.AdaptiveAvgPool2d((1, 1))
        self.C0 = nn.Sequential(
            nn.Linear(512, 10)
            )

        return

    def forward(self, x):

        f = self.AvgPool(self.F(x)).view(-1, 512)
        y0_ = self.C0(f)

        return y0_


class Model2(nn.Module):

    def __init__(self):
        super().__init__()

        self.F = nn.Sequential(*list(vgg19(weights=VGG19_Weights.IMAGENET1K_V1).children())[:-2])
        self.AvgPool = nn.AdaptiveAvgPool2d((1, 1))
        self.C0 = nn.Sequential(
            nn.Linear(512, 36)
            )
        self.C1 = nn.Sequential(
            nn.Linear(512, 36)
            )

        return

    def forward(self, x):

        f = self.AvgPool(self.F(x)).view(-1, 512)
        y0_ = self.C0(f)
        y1_ = self.C1(f)

        return y0_, y1_


class Model3(nn.Module):

    def __init__(self):
        super().__init__()

        self.F = nn.Sequential(*list(vgg19(weights=VGG19_Weights.IMAGENET1K_V1).children())[:-2])
        self.AvgPool = nn.AdaptiveAvgPool2d((1, 1))
        self.C0 = nn.Sequential(
            nn.Linear(512, 36)
            )
        self.C1 = nn.Sequential(
            nn.Linear(512, 36)
            )
        self.C2 = nn.Sequential(
            nn.Linear(512, 36)
            )
        self.C3 = nn.Sequential(
            nn.Linear(512, 36)
            )

        return

    def forward(self, x):

        f = self.AvgPool(self.F(x)).view(-1, 512)
        y0_ = self.C0(f)
        y1_ = self.C1(f)
        y2_ = self.C2(f)
        y3_ = self.C3(f)

        return y0_, y1_, y2_, y3_

In [None]:
cfg = Config()
cfg.datasets_dir = 'train/'
cfg.ckpts_dir = 'ckpts/task1/'


random.seed(cfg.seed)
np.random.seed(cfg.seed)
t.manual_seed(cfg.seed)
t.backends.cudnn.deterministic = True
t.backends.cudnn.benchmark = False

train_dataset = Task1(cfg)
train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.n_worker, pin_memory=True)

model = Model1()
model = model.to(cfg.device)

criterion = nn.CrossEntropyLoss()

optimizer = t.optim.SGD(model.parameters(), lr=cfg.lr, weight_decay=0.0001, momentum=0.9)

if not os.path.isdir(cfg.ckpts_dir):
    os.makedirs(cfg.ckpts_dir)


for epoch in range(cfg.n_epoch):

    for i, (xs, y0s) in enumerate(train_dataloader):

        xs = xs.to(cfg.device)
        y0s = y0s.to(cfg.device)

        y0s_ = model(xs)
        loss = criterion(y0s_, y0s)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f'Epoch: {epoch}, Iter: {i}, Loss: {loss.item():.5f}')

    t.save(model.state_dict(), os.path.join(cfg.ckpts_dir, f'model1_e{epoch}.ckpt'))

Epoch: 0, Iter: 0, Loss: 2.30880
Epoch: 0, Iter: 1, Loss: 2.36705
Epoch: 0, Iter: 2, Loss: 2.38234
Epoch: 0, Iter: 3, Loss: 2.32719
Epoch: 0, Iter: 4, Loss: 2.35513
Epoch: 0, Iter: 5, Loss: 2.26168
Epoch: 0, Iter: 6, Loss: 2.28435
Epoch: 0, Iter: 7, Loss: 2.25618
Epoch: 0, Iter: 8, Loss: 2.27289
Epoch: 0, Iter: 9, Loss: 2.27736
Epoch: 0, Iter: 10, Loss: 2.26060
Epoch: 0, Iter: 11, Loss: 2.27623
Epoch: 0, Iter: 12, Loss: 2.24533
Epoch: 0, Iter: 13, Loss: 2.25414
Epoch: 0, Iter: 14, Loss: 2.23818
Epoch: 0, Iter: 15, Loss: 2.22784
Epoch: 1, Iter: 0, Loss: 2.19214
Epoch: 1, Iter: 1, Loss: 2.15797
Epoch: 1, Iter: 2, Loss: 2.19104
Epoch: 1, Iter: 3, Loss: 2.14261
Epoch: 1, Iter: 4, Loss: 2.09349
Epoch: 1, Iter: 5, Loss: 2.07215
Epoch: 1, Iter: 6, Loss: 2.06429
Epoch: 1, Iter: 7, Loss: 2.06669
Epoch: 1, Iter: 8, Loss: 1.95282
Epoch: 1, Iter: 9, Loss: 1.94114
Epoch: 1, Iter: 10, Loss: 1.91740
Epoch: 1, Iter: 11, Loss: 1.78502
Epoch: 1, Iter: 12, Loss: 1.74825
Epoch: 1, Iter: 13, Loss: 1.63397


In [None]:
cfg = Config()
cfg.lr = 0.01
cfg.datasets_dir = 'train/'
cfg.ckpts_dir = 'ckpts/task2/'


random.seed(cfg.seed)
np.random.seed(cfg.seed)
t.manual_seed(cfg.seed)
t.backends.cudnn.deterministic = True
t.backends.cudnn.benchmark = False

train_dataset = Task2(cfg)
train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.n_worker, pin_memory=True)

model = Model2()
model = model.to(cfg.device)

criterion = nn.CrossEntropyLoss()

optimizer = t.optim.SGD(model.parameters(), lr=cfg.lr, weight_decay=0.0001, momentum=0.9)

if not os.path.isdir(cfg.ckpts_dir):
    os.makedirs(cfg.ckpts_dir)


for epoch in range(cfg.n_epoch):

    for i, (xs, y0s, y1s) in enumerate(train_dataloader):

        xs = xs.to(cfg.device)
        y0s = y0s.to(cfg.device)
        y1s = y1s.to(cfg.device)

        y0s_, y1s_ = model(xs)
        loss = (criterion(y0s_, y0s) + criterion(y1s_, y1s)) / 2.0
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f'Epoch: {epoch}, Iter: {i}, Loss: {loss.item():.5f}')

    t.save(model.state_dict(), os.path.join(cfg.ckpts_dir, f'model2_e{epoch}.ckpt'))

Epoch: 0, Iter: 0, Loss: 3.72082
Epoch: 0, Iter: 1, Loss: 3.58403
Epoch: 0, Iter: 2, Loss: 3.53094
Epoch: 0, Iter: 3, Loss: 3.54517
Epoch: 0, Iter: 4, Loss: 3.53466
Epoch: 0, Iter: 5, Loss: 3.54363
Epoch: 0, Iter: 6, Loss: 3.51031
Epoch: 0, Iter: 7, Loss: 3.47848
Epoch: 0, Iter: 8, Loss: 3.48132
Epoch: 0, Iter: 9, Loss: 3.44130
Epoch: 0, Iter: 10, Loss: 3.37935
Epoch: 0, Iter: 11, Loss: 3.35971
Epoch: 0, Iter: 12, Loss: 3.38626
Epoch: 0, Iter: 13, Loss: 3.33608
Epoch: 0, Iter: 14, Loss: 3.27854
Epoch: 0, Iter: 15, Loss: 3.34108
Epoch: 0, Iter: 16, Loss: 3.32048
Epoch: 0, Iter: 17, Loss: 3.29981
Epoch: 0, Iter: 18, Loss: 3.24907
Epoch: 0, Iter: 19, Loss: 3.21783
Epoch: 1, Iter: 0, Loss: 3.22940
Epoch: 1, Iter: 1, Loss: 3.10821
Epoch: 1, Iter: 2, Loss: 3.16836
Epoch: 1, Iter: 3, Loss: 3.07909
Epoch: 1, Iter: 4, Loss: 3.06752
Epoch: 1, Iter: 5, Loss: 3.20266
Epoch: 1, Iter: 6, Loss: 3.14074
Epoch: 1, Iter: 7, Loss: 2.96380
Epoch: 1, Iter: 8, Loss: 2.98566
Epoch: 1, Iter: 9, Loss: 2.89948


In [None]:
cfg = Config()
cfg.lr = 0.01
cfg.datasets_dir = 'train/'
cfg.ckpts_dir = 'ckpts/task3/'


random.seed(cfg.seed)
np.random.seed(cfg.seed)
t.manual_seed(cfg.seed)
t.backends.cudnn.deterministic = True
t.backends.cudnn.benchmark = False

train_dataset = Task3(cfg)
train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.n_worker, pin_memory=True)

model = Model3()
model = model.to(cfg.device)

criterion = nn.CrossEntropyLoss()

optimizer = t.optim.SGD(model.parameters(), lr=cfg.lr, weight_decay=0.0001, momentum=0.9)

if not os.path.isdir(cfg.ckpts_dir):
    os.makedirs(cfg.ckpts_dir)


for epoch in range(cfg.n_epoch):

    for i, (xs, y0s, y1s, y2s, y3s) in enumerate(train_dataloader):

        xs = xs.to(cfg.device)
        y0s = y0s.to(cfg.device)
        y1s = y1s.to(cfg.device)
        y2s = y2s.to(cfg.device)
        y3s = y3s.to(cfg.device)

        y0s_, y1s_, y2s_, y3s_ = model(xs)
        loss = (criterion(y0s_, y0s) + criterion(y1s_, y1s) + criterion(y2s_, y2s) + criterion(y3s_, y3s)) / 4.0
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f'Epoch: {epoch}, Iter: {i}, Loss: {loss.item():.5f}')

    t.save(model.state_dict(), os.path.join(cfg.ckpts_dir, f'model3_e{epoch}.ckpt'))

Epoch: 0, Iter: 0, Loss: 3.71435
Epoch: 0, Iter: 1, Loss: 3.62017
Epoch: 0, Iter: 2, Loss: 3.59258
Epoch: 0, Iter: 3, Loss: 3.57511
Epoch: 0, Iter: 4, Loss: 3.57761
Epoch: 0, Iter: 5, Loss: 3.56114
Epoch: 0, Iter: 6, Loss: 3.56523
Epoch: 0, Iter: 7, Loss: 3.55053
Epoch: 0, Iter: 8, Loss: 3.56306
Epoch: 0, Iter: 9, Loss: 3.58385
Epoch: 0, Iter: 10, Loss: 3.54147
Epoch: 0, Iter: 11, Loss: 3.56138
Epoch: 0, Iter: 12, Loss: 3.54065
Epoch: 0, Iter: 13, Loss: 3.52470
Epoch: 0, Iter: 14, Loss: 3.51828
Epoch: 0, Iter: 15, Loss: 3.50891
Epoch: 0, Iter: 16, Loss: 3.49438
Epoch: 0, Iter: 17, Loss: 3.48300
Epoch: 0, Iter: 18, Loss: 3.45625
Epoch: 0, Iter: 19, Loss: 3.45850
Epoch: 0, Iter: 20, Loss: 3.43316
Epoch: 0, Iter: 21, Loss: 3.42931
Epoch: 0, Iter: 22, Loss: 3.38815
Epoch: 0, Iter: 23, Loss: 3.42953
Epoch: 1, Iter: 0, Loss: 3.38981
Epoch: 1, Iter: 1, Loss: 3.36573
Epoch: 1, Iter: 2, Loss: 3.38708
Epoch: 1, Iter: 3, Loss: 3.37623
Epoch: 1, Iter: 4, Loss: 3.40612
Epoch: 1, Iter: 5, Loss: 3.37