In [2]:
import csv
import cv2
import numpy as np
import pandas as pd
import random
import os

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


In [3]:
TEST_PATH = "./dataset/test"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
ALPHABET = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
ALL_CHAR_SET = NUMBER + ALPHABET
ALL_CHAR_SET_LEN = len(ALL_CHAR_SET)
print(ALL_CHAR_SET_LEN)

36


In [5]:
class TaskDataset(Dataset):
    def __init__(self, data, root, captcha_len, return_filename=False):
        self.data = data
        self.return_filename = return_filename
        self.root = root
        self.captcha_len = captcha_len
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}", cv2.IMREAD_GRAYSCALE)
        #_,img = cv2.threshold(img,180,255,cv2.THRESH_BINARY)
        #img = ~img
        #img = cv2.erode(img, np.ones((2, 2), np.uint8), iterations = 1)
        #img = ~img
        #img = scipy.ndimage.median_filter(img, (5, 1))
        img = np.array(img)
        if self.return_filename:
            return torch.FloatTensor(img), filename
        else:
            return torch.FloatTensor(img), self.one_hot_encode(label)

    def __len__(self):
        return len(self.data)
    
    def one_hot_encode(self, label):
        onehot = [0] * (ALL_CHAR_SET_LEN * self.captcha_len)
        for i, l in enumerate(label):
            idx = ALL_CHAR_SET.index(l) + i * ALL_CHAR_SET_LEN
            onehot[idx] = 1
        return np.array(onehot)

In [6]:
class Model(nn.Module):
    def __init__(self, output_len):
        super(Model, self).__init__()
        self.output_len = output_len
        self.conv = nn.Sequential(
                # batch*1*72*96 / batch*1*72*96
                nn.Conv2d(1, 4, 3, padding=(1, 1)),
                nn.BatchNorm2d(4),
                nn.Conv2d(4, 16, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                # batch*16*36*48
                nn.Conv2d(16, 32, 3, padding=(1, 1)),
                nn.BatchNorm2d(32),
                nn.Conv2d(32, 64, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                # batch*64*18*24
                nn.Conv2d(64, 128, 3, padding=(1, 1)),
                nn.BatchNorm2d(128),
                nn.Conv2d(128, 256, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                # batch*128*9*12
                nn.Conv2d(256, 512, 3, padding=(1, 1)),
                nn.BatchNorm2d(512),
                nn.Conv2d(512, 1024, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(1024),
                nn.ReLU(),
                #batch*512*4*6
                #nn.Conv2d(512, 512, 3, padding=(1, 1)),
                #nn.MaxPool2d(2, 2),
                #nn.BatchNorm2d(512),
                #nn.ReLU(),
                #batch*512*2*3
                )
        self.fc = nn.Linear(1024*4*4, 1024)
        self.fc_task3 = nn.Linear(1024*4*6, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.out = nn.Linear(256, self.output_len)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        b, h, w = x.shape 
        x = x.view(b,1,h,w)
        x = self.conv(x)
        if self.output_len > 100:
            x = x.view(-1, 1024*4*6)
            x = self.fc_task3(x)
        else:
            x = x.view(-1, 1024*4*4)
            x = self.fc(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.dropout(x)
        x = self.out(x)
        return x

In [7]:
test_data_1 = []
test_data_2 = []
test_data_3 = []
with open(f'{TEST_PATH}/../sample_submission.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[0].startswith("task1"):
            test_data_1.append(row)
        elif row[0].startswith("task2"):
            test_data_2.append(row)
        elif row[0].startswith("task3"):
            test_data_3.append(row)

test_ds_1 = TaskDataset(test_data_1, root=TEST_PATH, captcha_len=1, return_filename=True)
test_dl_1 = DataLoader(test_ds_1, batch_size=100, num_workers=0, drop_last=False, shuffle=False)

test_ds_2 = TaskDataset(test_data_2, root=TEST_PATH, captcha_len=2, return_filename=True)
test_dl_2 = DataLoader(test_ds_2, batch_size=100, num_workers=0, drop_last=False, shuffle=False)

test_ds_3 = TaskDataset(test_data_3, root=TEST_PATH, captcha_len=4, return_filename=True)
test_dl_3 = DataLoader(test_ds_3, batch_size=100, num_workers=0, drop_last=False, shuffle=False)

In [8]:
PATH_1 = "task_1_model.pt"
PATH_2 = "task_2_model.pt"
PATH_3 = "task_3_model.pt"

In [9]:
with open('submission.csv', 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(["filename", "label"])

In [10]:
model = Model(ALL_CHAR_SET_LEN*1).to(device)
checkpoint = torch.load(PATH_1)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
with open('submission.csv', 'a', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    for image, filenames in test_dl_1:
        image = image.to(device)
        pred = model(image)
        p1 = torch.argmax(pred.T[0:10], dim=0)
        for i in range(len(filenames)):
            ans = ALL_CHAR_SET[p1[i].item()]
            csv_writer.writerow([filenames[i], ans])

In [11]:
model = Model(ALL_CHAR_SET_LEN*2).to(device)
checkpoint = torch.load(PATH_2)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
with open('submission.csv', 'a', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    for image, filenames in test_dl_2:
        image = image.to(device)
        pred = model(image)
        p1 = torch.argmax(pred.T[0:ALL_CHAR_SET_LEN], dim=0)
        p2 = torch.argmax(pred.T[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2], dim=0)
        for i in range(len(filenames)):
            ans = ALL_CHAR_SET[p1[i].item()] + ALL_CHAR_SET[p2[i].item()]
            csv_writer.writerow([filenames[i], ans])

In [12]:
model = Model(ALL_CHAR_SET_LEN*4).to(device)
checkpoint = torch.load(PATH_3)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
with open('submission.csv', 'a', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    for image, filenames in test_dl_3:
        image = image.to(device)
        pred = model(image)
        p1 = torch.argmax(pred.T[0:ALL_CHAR_SET_LEN], dim=0)
        p2 = torch.argmax(pred.T[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2], dim=0)
        p3 = torch.argmax(pred.T[ALL_CHAR_SET_LEN*2:ALL_CHAR_SET_LEN*3], dim=0)
        p4 = torch.argmax(pred.T[ALL_CHAR_SET_LEN*3:ALL_CHAR_SET_LEN*4], dim=0)
        for i in range(len(filenames)):
            ans = ALL_CHAR_SET[p1[i].item()] + ALL_CHAR_SET[p2[i].item()] + ALL_CHAR_SET[p3[i].item()] + ALL_CHAR_SET[p4[i].item()]
            csv_writer.writerow([filenames[i], ans])