# Environment setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### copy dataset and model weights

In [None]:
!cp /content/drive/MyDrive/tmp/captcha-hacker.zip .
!cp /content/drive/MyDrive/tmp/task1.pth .
!cp /content/drive/MyDrive/tmp/task2.pth .
!cp /content/drive/MyDrive/tmp/task3.pth .

In [None]:
!unzip -q -o captcha-hacker.zip -d captcha-hacker

In [None]:
!ls -al

total 568872
drwxr-xr-x 1 root root      4096 Dec 11 11:11 .
drwxr-xr-x 1 root root      4096 Dec 11 11:08 ..
drwxr-xr-x 4 root root      4096 Dec 11 11:11 captcha-hacker
-rw------- 1 root root  66230128 Dec 11 11:11 captcha-hacker.zip
drwxr-xr-x 4 root root      4096 Dec  8 14:35 .config
drwx------ 5 root root      4096 Dec 11 11:11 drive
drwxr-xr-x 1 root root      4096 Dec  8 14:36 sample_data
-rw------- 1 root root 171232525 Dec 11 11:11 task1.pth
-rw------- 1 root root 172511053 Dec 11 11:11 task2.pth
-rw------- 1 root root 172511053 Dec 11 11:11 task3.pth


In [None]:
TRAIN_PATH = "captcha-hacker/train"
TEST_PATH = "captcha-hacker/test"

# Model

In [None]:
import glob
import os

import torch
import torch.jit
import torch.nn as nn
from torchsummary import summary

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
class Model_task1(torch.nn.Module):
    def __init__(self):
        super(Model_task1, self).__init__()

        self._hidden1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=48,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=48),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden2 = nn.Sequential(
            nn.Conv2d(in_channels=48, out_channels=64,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=160,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=160),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden5 = nn.Sequential(
            nn.Conv2d(in_channels=160, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden6 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden7 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden8 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden9 = nn.Sequential(
            nn.Linear(192 * 7 * 7, 3072),
            nn.ReLU()
        )
        self._hidden10 = nn.Sequential(
            nn.Linear(3072, 3072),
            nn.ReLU()
        )

        self.digit1 = nn.Sequential(nn.Linear(3072, 10))
        self.digit2 = nn.Sequential(nn.Linear(3072, 10))
        self.digit3 = nn.Sequential(nn.Linear(3072, 10))
        self.digit4 = nn.Sequential(nn.Linear(3072, 10))

    def forward(self, x):
        x = self._hidden1(x)
        x = self._hidden2(x)
        x = self._hidden3(x)
        x = self._hidden4(x)
        x = self._hidden5(x)
        x = self._hidden6(x)
        x = self._hidden7(x)
        x = self._hidden8(x)
        x = x.view(x.size(0), 192 * 7 * 7)
        x = self._hidden9(x)
        x = self._hidden10(x)

        digit1_logits = self.digit1(x)
        digit2_logits = self.digit2(x)
        digit3_logits = self.digit3(x)
        digit4_logits = self.digit4(x)

        return digit1_logits, digit2_logits, digit3_logits, digit4_logits

In [None]:
class Model_main_task(torch.nn.Module):
    def __init__(self):
        super(Model_main_task, self).__init__()

        self._hidden1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=48,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=48),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden2 = nn.Sequential(
            nn.Conv2d(in_channels=48, out_channels=64,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=160,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=160),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden5 = nn.Sequential(
            nn.Conv2d(in_channels=160, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden6 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden7 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden8 = nn.Sequential(
            nn.Conv2d(in_channels=192, out_channels=192,
                      kernel_size=5, padding=2),
            nn.BatchNorm2d(num_features=192),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1, padding=1),
            nn.Dropout(0.2)
        )
        self._hidden9 = nn.Sequential(
            nn.Linear(192 * 7 * 7, 3072),
            nn.ReLU()
        )
        self._hidden10 = nn.Sequential(
            nn.Linear(3072, 3072),
            nn.ReLU()
        )

        self.a_digit1 = nn.Sequential(nn.Linear(3072, 36))
        self.a_digit2 = nn.Sequential(nn.Linear(3072, 36))
        self.a_digit3 = nn.Sequential(nn.Linear(3072, 36))
        self.a_digit4 = nn.Sequential(nn.Linear(3072, 36))

    def forward(self, x):
        x = self._hidden1(x)
        x = self._hidden2(x)
        x = self._hidden3(x)
        x = self._hidden4(x)
        x = self._hidden5(x)
        x = self._hidden6(x)
        x = self._hidden7(x)
        x = self._hidden8(x)
        x = x.view(x.size(0), 192 * 7 * 7)
        x = self._hidden9(x)
        x = self._hidden10(x)

        digit1_logits = self.a_digit1(x)
        digit2_logits = self.a_digit2(x)
        digit3_logits = self.a_digit3(x)
        digit4_logits = self.a_digit4(x)

        return digit1_logits, digit2_logits, digit3_logits, digit4_logits

# Dataset

In [None]:
import cv2
from PIL import Image
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import csv
from matplotlib import pyplot as plt

In [None]:
class CustomDataset(Dataset):
    def __init__(self, X, metadata, transform=None):
        self.X = X
        self.metadata = metadata
        self._transform = transform

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, index):
        image = self.X[index]
        filename, label = self.metadata[index]

        image = Image.fromarray(image)

        if self._transform:
            image = self._transform(image)

        # length = len(label)
        digits = list()
        for ch in label:
            if ch.isdigit():  # 0~9
                t = int(ch)
            else:   # 10~35
                t = 10 + ord(ch) - ord('a')
            digits.append(t)

        # digits += [0] * (4 - len(digits))

        return image, digits
        # return image, length-1, digits
        # len=4 => 3s

In [None]:
transform = transforms.Compose([
    # transforms.RandomCrop([54, 54]),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

In [None]:
test_task1_metadata = []
test_task2_metadata = []
test_task3_metadata = []

PATH_sample_submission = os.path.join(
    "captcha-hacker", "sample_submission.csv")

with open(PATH_sample_submission, newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    next(reader, None)  # skip header

    for row in reader:
        filename, label = row

        if filename.startswith("task1"):
            test_task1_metadata.append(row)
        elif filename.startswith("task2"):
            test_task2_metadata.append(row)
        elif filename.startswith("task3"):
            test_task3_metadata.append(row)

In [None]:
test_task1_data = []
test_task2_data = []
test_task3_data = []

for filename, label in test_task1_metadata:
    image = cv2.imread(os.path.join('.', TEST_PATH, filename))
    image = cv2.resize(image, [64, 64])
    test_task1_data.append(image)

for filename, label in test_task2_metadata:
    image = cv2.imread(os.path.join('.', TEST_PATH, filename))
    image = cv2.resize(image, [64, 64])
    test_task2_data.append(image)

for filename, label in test_task3_metadata:
    image = cv2.imread(os.path.join('.', TEST_PATH, filename))
    image = cv2.resize(image, [64, 64])
    test_task3_data.append(image)

In [None]:
print(len(test_task1_metadata))
print(len(test_task2_metadata))
print(len(test_task3_metadata))
print(len(test_task1_data))
print(len(test_task2_data))
print(len(test_task3_data))

6500
2500
1000
6500
2500
1000


In [None]:
batch_size = 500

test_task1_dataset = CustomDataset(
    test_task1_data, test_task1_metadata, transform=transform)
test_task2_dataset = CustomDataset(
    test_task2_data, test_task2_metadata, transform=transform)
test_task3_dataset = CustomDataset(
    test_task3_data, test_task3_metadata, transform=transform)

test_task1_loader = DataLoader(
    test_task1_dataset, batch_size=batch_size, drop_last=False, shuffle=False)
test_task2_loader = DataLoader(
    test_task2_dataset, batch_size=batch_size, drop_last=False, shuffle=False)
test_task3_loader = DataLoader(
    test_task3_dataset, batch_size=batch_size, drop_last=False, shuffle=False)

In [None]:
PATH_WEIGHT_task1 = "task1.pth"
PATH_WEIGHT_task2 = "task2.pth"
PATH_WEIGHT_task3 = "task3.pth"

In [None]:
t_model_task1 = Model_task1()
t_model_task1.load_state_dict(torch.load(PATH_WEIGHT_task1))
t_model_task1 = t_model_task1.to(device)

t_model_task2 = Model_main_task()
t_model_task2.load_state_dict(torch.load(PATH_WEIGHT_task2))
t_model_task2 = t_model_task2.to(device)

t_model_task3 = Model_main_task()
t_model_task3.load_state_dict(torch.load(PATH_WEIGHT_task3))
t_model_task3 = t_model_task3.to(device)

# Predict

In [None]:
def predict_task1(model, dataloader):
    pred = []

    with torch.no_grad():
        for batch_idx, (images, digits_labels) in enumerate(dataloader):
            images, digits_labels = images.to(device), [digit_labels.to(
                device) for digit_labels in digits_labels]
            digit1_logits, digit2_logits, digit3_logits, digit4_logits = model.eval()(images)

            _, digit1_pred = torch.max(digit1_logits, 1)

            for x in digit1_pred.tolist():
                pred.append(str(x))

    return pred


def predict_task2(model, dataloader):
    pred = []

    with torch.no_grad():
        for batch_idx, (images, digits_labels) in enumerate(dataloader):
            images, digits_labels = images.to(device), [digit_labels.to(
                device) for digit_labels in digits_labels]
            digit1_logits, digit2_logits, digit3_logits, digit4_logits = model.eval()(images)

            _, digit1_pred = torch.max(digit1_logits, 1)
            _, digit2_pred = torch.max(digit2_logits, 1)

            for a, b in zip(digit1_pred.tolist(), digit2_pred.tolist()):
                def to_char(x): return str(
                    x) if x < 10 else chr(ord('a') + x - 10)
                a = to_char(a)
                b = to_char(b)
                pred.append(a+b)

    return pred


def predict_task3(model, dataloader):
    pred = []

    with torch.no_grad():
        for batch_idx, (images, digits_labels) in enumerate(dataloader):
            images, digits_labels = images.to(device), [digit_labels.to(
                device) for digit_labels in digits_labels]
            digit1_logits, digit2_logits, digit3_logits, digit4_logits = model.eval()(images)

            _, digit1_pred = torch.max(digit1_logits, 1)
            _, digit2_pred = torch.max(digit2_logits, 1)
            _, digit3_pred = torch.max(digit3_logits, 1)
            _, digit4_pred = torch.max(digit4_logits, 1)

            for a, b, c, d in zip(digit1_pred.tolist(), digit2_pred.tolist(), digit3_pred.tolist(), digit4_pred.tolist()):
                def to_char(x): return str(
                    x) if x < 10 else chr(ord('a') + x - 10)
                a = to_char(a)
                b = to_char(b)
                c = to_char(c)
                d = to_char(d)
                pred.append(a+b+c+d)
    return pred

In [None]:
a_list = predict_task1(t_model_task1, test_task1_loader)
b_list = predict_task2(t_model_task2, test_task2_loader)
c_list = predict_task3(t_model_task3, test_task3_loader)

In [None]:
# print(a_list)
# print(b_list)
# print(c_list)
print(len(a_list))
print(len(b_list))
print(len(c_list))

6500
2500
1000


In [None]:
test_task3_metadata[:9]

[['task3/J230s6yyYxS9D17w.png', '0'],
 ['task3/8FbEKDFjOts0Zdzh.png', '0'],
 ['task3/lUXo5RCSYLKm3Tf9.png', '0'],
 ['task3/aNBgLD8dGPrX7qA0.png', '0'],
 ['task3/7VKSsZTl7ctVCBHV.png', '0'],
 ['task3/c2mxpacCc7SdD9Tk.png', '0'],
 ['task3/4P2Wq0971XJ2w26v.png', '0'],
 ['task3/1tFHZ1wB8xsOJjtV.png', '0'],
 ['task3/NcSub9WjkQfOWqhi.png', '0']]

In [None]:
with open('submission.csv', 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)

    csv_writer.writerow(["filename", "label"])

    for (filename, _), a in zip(test_task1_metadata, a_list):
        csv_writer.writerow([filename, a])

    for (filename, _), b in zip(test_task2_metadata, b_list):
        csv_writer.writerow([filename, b])

    for (filename, _), c in zip(test_task3_metadata, c_list):
        csv_writer.writerow([filename, c])

In [None]:
!cp submission.csv /content/drive/MyDrive/tmp