In [2]:
import csv
import cv2
import numpy as np
import pandas as pd
import random
import os

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [3]:
TRAIN_PATH = "./dataset/train"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
ALPHABET = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
ALL_CHAR_SET = NUMBER + ALPHABET
ALL_CHAR_SET_LEN = len(ALL_CHAR_SET)
print(ALL_CHAR_SET_LEN)

36


In [5]:
class TaskDataset(Dataset):
    def __init__(self, data, root, captcha_len, return_filename=False):
        self.data = data
        self.return_filename = return_filename
        self.root = root
        self.captcha_len = captcha_len
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = cv2.imread(f"{self.root}/{filename}", cv2.IMREAD_GRAYSCALE)
        #_,img = cv2.threshold(img,180,255,cv2.THRESH_BINARY)
        #img = ~img
        #img = cv2.erode(img, np.ones((2, 2), np.uint8), iterations = 1)
        #img = ~img
        #img = scipy.ndimage.median_filter(img, (5, 1))
        img = np.array(img)
        if self.return_filename:
            return torch.FloatTensor(img), filename
        else:
            return torch.FloatTensor(img), self.one_hot_encode(label)

    def __len__(self):
        return len(self.data)
    
    def one_hot_encode(self, label):
        onehot = [0] * (ALL_CHAR_SET_LEN * self.captcha_len)
        for i, l in enumerate(label):
            idx = ALL_CHAR_SET.index(l) + i * ALL_CHAR_SET_LEN
            onehot[idx] = 1
        return np.array(onehot)

In [6]:
data_1 = []
data_2 = []
data_3 = []

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[0].startswith("task1"):
            data_1.append(row)
        elif row[0].startswith("task2"):
            data_2.append(row)
        elif row[0].startswith("task3"):
            data_3.append(row)

random.shuffle(data_1)
random.shuffle(data_2)
random.shuffle(data_3)

train_data_1 = data_1[0:int(len(data_1)*0.8)]
train_ds_1 = TaskDataset(train_data_1, root=TRAIN_PATH, captcha_len=1)
train_dl_1 = DataLoader(train_ds_1, batch_size=100, num_workers=0, drop_last=True, shuffle=True)

val_data_1 = data_1[int(len(data_1)*0.8):]
val_ds_1 = TaskDataset(val_data_1, root=TRAIN_PATH, captcha_len=1)
val_dl_1 = DataLoader(val_ds_1, batch_size=100, num_workers=0, drop_last=False, shuffle=False)

train_data_2 = data_2[0:int(len(data_2)*0.8)]
train_ds_2 = TaskDataset(train_data_2, root=TRAIN_PATH, captcha_len=2)
train_dl_2 = DataLoader(train_ds_2, batch_size=100, num_workers=0, drop_last=True, shuffle=True)

val_data_2 = data_2[int(len(data_2)*0.8):]
val_ds_2 = TaskDataset(val_data_2, root=TRAIN_PATH, captcha_len=2)
val_dl_2 = DataLoader(val_ds_2, batch_size=100, num_workers=0, drop_last=False, shuffle=False)

train_data_3 = data_3[0:int(len(data_3)*0.8)]
train_ds_3 = TaskDataset(train_data_3, root=TRAIN_PATH, captcha_len=4)
train_dl_3 = DataLoader(train_ds_3, batch_size=100, num_workers=0, drop_last=True, shuffle=True)

val_data_3 = data_3[int(len(data_3)*0.8):]
val_ds_3 = TaskDataset(val_data_3, root=TRAIN_PATH, captcha_len=4)
val_dl_3 = DataLoader(val_ds_3, batch_size=100, num_workers=0, drop_last=False, shuffle=False)

In [7]:
class Model(nn.Module):
    def __init__(self, output_len):
        super(Model, self).__init__()
        self.output_len = output_len
        self.conv = nn.Sequential(
                # batch*1*72*96 / batch*1*72*96
                nn.Conv2d(1, 4, 3, padding=(1, 1)),
                nn.BatchNorm2d(4),
                nn.Conv2d(4, 16, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                # batch*16*36*48
                nn.Conv2d(16, 32, 3, padding=(1, 1)),
                nn.BatchNorm2d(32),
                nn.Conv2d(32, 64, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                # batch*64*18*24
                nn.Conv2d(64, 128, 3, padding=(1, 1)),
                nn.BatchNorm2d(128),
                nn.Conv2d(128, 256, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                # batch*128*9*12
                nn.Conv2d(256, 512, 3, padding=(1, 1)),
                nn.BatchNorm2d(512),
                nn.Conv2d(512, 1024, 3, padding=(1, 1)),
                nn.MaxPool2d(2, 2),
                nn.BatchNorm2d(1024),
                nn.ReLU(),
                #batch*512*4*6
                )
        self.fc = nn.Linear(1024*4*4, 1024)
        self.fc_task3 = nn.Linear(1024*4*6, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.out = nn.Linear(256, self.output_len)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        b, h, w = x.shape 
        x = x.view(b,1,h,w)
        x = self.conv(x)
        if self.output_len > 100:
            x = x.view(-1, 1024*4*6)
            x = self.fc_task3(x)
        else:
            x = x.view(-1, 1024*4*4)
            x = self.fc(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.dropout(x)
        x = self.out(x)
        return x

In [8]:
modelfortask1 = Model(ALL_CHAR_SET_LEN * 1).to(device)
optimizer = torch.optim.Adam(modelfortask1.parameters(), lr=1e-5)
loss_fn = nn.CrossEntropyLoss()
best_acc = 0

In [8]:
for epoch in range(10):
    print(f"Epoch [{epoch+1}]")
    modelfortask1.train()
    for image, label in train_dl_1:
        image = image.to(device)
        label = label.to(device, dtype=torch.float)
        
        pred = modelfortask1(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    sample_count = 0
    correct_count = 0
    modelfortask1.eval()
    for image, label in val_dl_1:
        image = image.to(device)
        label = label.to(device, dtype=torch.float)
        
        pred = modelfortask1(image)
        loss = loss_fn(pred, label)
        
        pred = torch.argmax(pred.T[0:10], dim=0)
        label = torch.argmax(label.T[0:10], dim=0)

        sample_count += len(image)
        correct_count += (label == pred).sum()

    acc = correct_count / sample_count
    print("accuracy (validation):", acc)
    if acc > best_acc:
        best_acc = acc
        PATH_1 = "task_1_model.pt"
        torch.save({
            'model_state_dict': modelfortask1.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, PATH_1)



Epoch [1]
accuracy (validation): tensor(0.9975, device='cuda:0')
Epoch [2]
accuracy (validation): tensor(0.9992, device='cuda:0')
Epoch [3]
accuracy (validation): tensor(0.9997, device='cuda:0')
Epoch [4]
accuracy (validation): tensor(0.9983, device='cuda:0')
Epoch [5]
accuracy (validation): tensor(0.9976, device='cuda:0')
Epoch [6]
accuracy (validation): tensor(0.9994, device='cuda:0')
Epoch [7]
accuracy (validation): tensor(0.9994, device='cuda:0')
Epoch [8]
accuracy (validation): tensor(0.9971, device='cuda:0')
Epoch [9]
accuracy (validation): tensor(0.9992, device='cuda:0')
Epoch [10]
accuracy (validation): tensor(0.9996, device='cuda:0')


In [9]:
modelfortask2 = Model(ALL_CHAR_SET_LEN * 2).to(device)
optimizer = torch.optim.Adam(modelfortask2.parameters(), lr=1e-5)
loss_fn = nn.CrossEntropyLoss()
best_acc = 0

In [12]:
for epoch in range(5):
    print(f"Epoch [{epoch+1}]")
    modelfortask2.train()
    for image, label in train_dl_2:
        image = image.to(device)
        label = label.to(device, dtype=torch.float)
        
        pred = modelfortask2(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    sample_count = 0
    correct_count = 0
    modelfortask2.eval()
    for image, label in val_dl_2:
        image = image.to(device)
        label = label.to(device, dtype=torch.float)
        
        pred = modelfortask2(image)
        loss = loss_fn(pred, label)
            
        pred_1 = torch.argmax(pred.T[0:ALL_CHAR_SET_LEN], dim=0)
        label_1 = torch.argmax(label.T[0:ALL_CHAR_SET_LEN], dim=0)
        pred_2 = torch.argmax(pred.T[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2], dim=0)
        label_2 = torch.argmax(label.T[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2], dim=0)
        flag = torch.logical_and((label_1 == pred_1), (label_2 == pred_2))
        
        sample_count += len(image)
        correct_count += (flag).sum()

    acc = correct_count / sample_count
    print("accuracy (validation):", acc)
    if acc > best_acc:
        best_acc = acc
        PATH_2 = "task_2_model.pt"
        torch.save({
            'model_state_dict': modelfortask2.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, PATH_2)


Epoch [1]
accuracy (validation): tensor(0.9965, device='cuda:0')
Epoch [2]
accuracy (validation): tensor(0.9980, device='cuda:0')
Epoch [3]
accuracy (validation): tensor(0.9973, device='cuda:0')
Epoch [4]
accuracy (validation): tensor(0.9958, device='cuda:0')
Epoch [5]
accuracy (validation): tensor(0.9980, device='cuda:0')


In [9]:
modelfortask3 = Model(ALL_CHAR_SET_LEN * 4).to(device)
optimizer = torch.optim.Adam(modelfortask3.parameters(), lr=1e-5)
loss_fn = nn.CrossEntropyLoss()
best_acc = 0

In [11]:
for epoch in range(50):
    print(f"Epoch [{epoch+1}]")
    modelfortask3.train()
    for image, label in train_dl_3:
        image = image.to(device)
        label = label.to(device, dtype=torch.float)
        
        pred = modelfortask3(image)
        loss = loss_fn(pred, label)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    sample_count = 0
    correct_count = 0
    modelfortask3.eval()
    for image, label in val_dl_3:
        image = image.to(device)
        label = label.to(device, dtype=torch.float)
        
        pred = modelfortask3(image)
        loss = loss_fn(pred, label)
            
        pred_1 = torch.argmax(pred.T[0:ALL_CHAR_SET_LEN], dim=0)
        label_1 = torch.argmax(label.T[0:ALL_CHAR_SET_LEN], dim=0)
        pred_2 = torch.argmax(pred.T[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2], dim=0)
        label_2 = torch.argmax(label.T[ALL_CHAR_SET_LEN:ALL_CHAR_SET_LEN*2], dim=0)
        pred_3 = torch.argmax(pred.T[ALL_CHAR_SET_LEN*2:ALL_CHAR_SET_LEN*3], dim=0)
        label_3 = torch.argmax(label.T[ALL_CHAR_SET_LEN*2:ALL_CHAR_SET_LEN*3], dim=0)
        pred_4 = torch.argmax(pred.T[ALL_CHAR_SET_LEN*3:ALL_CHAR_SET_LEN*4], dim=0)
        label_4 = torch.argmax(label.T[ALL_CHAR_SET_LEN*3:ALL_CHAR_SET_LEN*4], dim=0)
        
        flag_1 = torch.logical_and((label_1 == pred_1), (label_2 == pred_2))
        flag_2 = torch.logical_and((label_3 == pred_3), (label_4 == pred_4))
        flag = torch.logical_and(flag_1, flag_2)
        
        sample_count += len(image)
        correct_count += (flag).sum()
    
    acc = correct_count / sample_count
    print("accuracy (validation):", acc)
    if acc > best_acc:
        best_acc = acc
        PATH_3 = "task_3_model.pt"
        torch.save({
            'model_state_dict': modelfortask3.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, PATH_3)


Epoch [1]
accuracy (validation): tensor(0.6194, device='cuda:0')
Epoch [2]
accuracy (validation): tensor(0.8296, device='cuda:0')
Epoch [3]
accuracy (validation): tensor(0.8847, device='cuda:0')
Epoch [4]
accuracy (validation): tensor(0.9125, device='cuda:0')
Epoch [5]
accuracy (validation): tensor(0.9271, device='cuda:0')
Epoch [6]
accuracy (validation): tensor(0.9395, device='cuda:0')
Epoch [7]
accuracy (validation): tensor(0.9467, device='cuda:0')
Epoch [8]
accuracy (validation): tensor(0.9532, device='cuda:0')
Epoch [9]
accuracy (validation): tensor(0.9560, device='cuda:0')
Epoch [10]
accuracy (validation): tensor(0.9587, device='cuda:0')
Epoch [11]
accuracy (validation): tensor(0.9623, device='cuda:0')
Epoch [12]
accuracy (validation): tensor(0.9641, device='cuda:0')
Epoch [13]
accuracy (validation): tensor(0.9663, device='cuda:0')
Epoch [14]
accuracy (validation): tensor(0.9671, device='cuda:0')
Epoch [15]
accuracy (validation): tensor(0.9684, device='cuda:0')
Epoch [16]
accuracy