Captcha dataset used for testing is taken from Kaggle: https://www.kaggle.com/fournierp/captcha-version-2-images


In [2]:
import os

In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
import torchvision.transforms as transforms
from torchvision import models


In [4]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

In [5]:
TOTAL_CHAR_COUNT = 26 + 10
TOTAL_CAPTCHA_LEN = 5
CHAR_SET = []
for num in range(10):
    CHAR_SET.append(str(num))
for chari in range(97, 97+26):
    CHAR_SET.append(chr(chari))

In [6]:
def getOneHotEncoding(s):
    ohev = torch.zeros(TOTAL_CAPTCHA_LEN*TOTAL_CHAR_COUNT, dtype = torch.float32)
    for ind in range(TOTAL_CAPTCHA_LEN):
        c = s[ind]
        ohev[TOTAL_CHAR_COUNT*ind + CHAR_SET.index(c)] = 1
    return ohev

In [7]:
class CaptchaDataset(Dataset):
    def __init__(self, img_path, transform=None):
        self.img_path = img_path
        self.img_list = os.listdir(self.img_path)
        self.transform = transform
        
    def __getitem__(self, idx):
        img_name = self.img_list[idx]
        img = Image.open(self.img_path +'/'+ img_name)
        img = img.convert('L')
        label = (img_name)[:-4]
        #print(label)
        ohev = getOneHotEncoding(label)
        if self.transform is not None:
            img = self.transform(img)
        return img, ohev, label
    
    def __len__(self):
        return len(self.img_list)

In [8]:
IMG_H, IMG_W = 224, 224

In [9]:
transform = transforms.Compose([
    transforms.Resize([IMG_H, IMG_W]),
    transforms.ToTensor(),
])

In [10]:
train_ds = CaptchaDataset('./Captcha_Dataset', transform=transform)
test_ds = CaptchaDataset('./Captcha_Dataset', transform)
train_dl = DataLoader(train_ds, batch_size=64, num_workers=4)
test_dl = DataLoader(train_ds, batch_size=1, num_workers=4)

In [11]:
# #model = models.DenseNet(growth_rate=32, block_config=(2, 4, 12, 8), num_classes = TOTAL_CAPTCHA_LEN*TOTAL_CHAR_COUNT)
model = models.resnet18(pretrained=False)

In [12]:
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        self.conv0 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=True)
        self.fcl = nn.Linear(in_features = 512,out_features=TOTAL_CAPTCHA_LEN*TOTAL_CHAR_COUNT, bias=True)
        self.model = models.resnet18(pretrained = False)
        self.model = nn.Sequential(*list(model.children())[1:-1])
    def forward(self, x):
        out = self.conv0(x)
        out = self.model(out)
        #print(out.shape)
        out = out.reshape(out.size(0), -1)
        #print(out.shape)
        out = self.fcl(out)
        return out

In [13]:
# conv0 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=True)
# fcl = nn.Linear(in_features = 32768,out_features=TOTAL_CAPTCHA_LEN*TOTAL_CHAR_COUNT, bias=True)
# new_model = nn.Sequential(conv0, *list(model.children())[1:-1], fcl)

In [14]:
new_model = ResNet()

In [15]:
loss_func = nn.MultiLabelSoftMarginLoss()
optm = torch.optim.Adam(new_model.parameters(), lr=0.0001)

In [16]:
new_model.cuda()

ResNet(
  (conv0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (fcl): Linear(in_features=512, out_features=180, bias=True)
  (model): Sequential(
    (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (3): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1,

In [18]:
for epoch in range(200):
    for step, i in enumerate(train_dl):
        img, ohev, label = i
        img = Variable(img).cuda()
        ohev = Variable(ohev.float()).cuda()
        pred = new_model(img)
        loss = loss_func(pred, ohev)
        optm.zero_grad()
        loss.backward()
        optm.step()
        print('eopch:', epoch+1, 'step:', step+1, 'loss:', loss.item())

eopch: 1 step: 1 loss: 0.06129993870854378
eopch: 1 step: 2 loss: 0.058805570006370544
eopch: 1 step: 3 loss: 0.059360913932323456
eopch: 1 step: 4 loss: 0.060044631361961365
eopch: 1 step: 5 loss: 0.058337483555078506
eopch: 1 step: 6 loss: 0.059389628469944
eopch: 1 step: 7 loss: 0.05897968262434006
eopch: 1 step: 8 loss: 0.06003958359360695
eopch: 1 step: 9 loss: 0.058112483471632004
eopch: 1 step: 10 loss: 0.058236800134181976
eopch: 1 step: 11 loss: 0.058174893260002136
eopch: 1 step: 12 loss: 0.056974686682224274
eopch: 1 step: 13 loss: 0.056501761078834534
eopch: 1 step: 14 loss: 0.0566464439034462
eopch: 1 step: 15 loss: 0.05844882130622864
eopch: 1 step: 16 loss: 0.057101741433143616
eopch: 1 step: 17 loss: 0.051686763763427734
eopch: 2 step: 1 loss: 0.05727478861808777
eopch: 2 step: 2 loss: 0.05470728129148483
eopch: 2 step: 3 loss: 0.05536648631095886
eopch: 2 step: 4 loss: 0.05612003430724144
eopch: 2 step: 5 loss: 0.054416071623563766
eopch: 2 step: 6 loss: 0.055717520415

eopch: 12 step: 1 loss: 0.03029671311378479
eopch: 12 step: 2 loss: 0.028888089582324028
eopch: 12 step: 3 loss: 0.029419856145977974
eopch: 12 step: 4 loss: 0.02961622178554535
eopch: 12 step: 5 loss: 0.0288187637925148
eopch: 12 step: 6 loss: 0.02958245947957039
eopch: 12 step: 7 loss: 0.029216736555099487
eopch: 12 step: 8 loss: 0.029756512492895126
eopch: 12 step: 9 loss: 0.028863176703453064
eopch: 12 step: 10 loss: 0.02882460504770279
eopch: 12 step: 11 loss: 0.02905050292611122
eopch: 12 step: 12 loss: 0.028542863205075264
eopch: 12 step: 13 loss: 0.027846915647387505
eopch: 12 step: 14 loss: 0.028147924691438675
eopch: 12 step: 15 loss: 0.028914161026477814
eopch: 12 step: 16 loss: 0.028389740735292435
eopch: 12 step: 17 loss: 0.025019405409693718
eopch: 13 step: 1 loss: 0.02824549376964569
eopch: 13 step: 2 loss: 0.027083449065685272
eopch: 13 step: 3 loss: 0.0276944637298584
eopch: 13 step: 4 loss: 0.027939872816205025
eopch: 13 step: 5 loss: 0.027219638228416443
eopch: 13 st

eopch: 22 step: 13 loss: 0.01600104570388794
eopch: 22 step: 14 loss: 0.01627293974161148
eopch: 22 step: 15 loss: 0.016650507226586342
eopch: 22 step: 16 loss: 0.016516100615262985
eopch: 22 step: 17 loss: 0.014548659324645996
eopch: 23 step: 1 loss: 0.016415216028690338
eopch: 23 step: 2 loss: 0.015735764056444168
eopch: 23 step: 3 loss: 0.016095707193017006
eopch: 23 step: 4 loss: 0.016118768602609634
eopch: 23 step: 5 loss: 0.01577705517411232
eopch: 23 step: 6 loss: 0.01619456708431244
eopch: 23 step: 7 loss: 0.016138318926095963
eopch: 23 step: 8 loss: 0.01644483581185341
eopch: 23 step: 9 loss: 0.016000233590602875
eopch: 23 step: 10 loss: 0.016005244106054306
eopch: 23 step: 11 loss: 0.015946175903081894
eopch: 23 step: 12 loss: 0.01566373184323311
eopch: 23 step: 13 loss: 0.015247335657477379
eopch: 23 step: 14 loss: 0.015554388053715229
eopch: 23 step: 15 loss: 0.01593976840376854
eopch: 23 step: 16 loss: 0.015814755111932755
eopch: 23 step: 17 loss: 0.01389466691762209
eopch

eopch: 33 step: 7 loss: 0.01063862070441246
eopch: 33 step: 8 loss: 0.010886786505579948
eopch: 33 step: 9 loss: 0.010524651035666466
eopch: 33 step: 10 loss: 0.01054549403488636
eopch: 33 step: 11 loss: 0.010448509827256203
eopch: 33 step: 12 loss: 0.010243739001452923
eopch: 33 step: 13 loss: 0.010031133890151978
eopch: 33 step: 14 loss: 0.010245384648442268
eopch: 33 step: 15 loss: 0.010464038699865341
eopch: 33 step: 16 loss: 0.010408978909254074
eopch: 33 step: 17 loss: 0.009169411845505238
eopch: 34 step: 1 loss: 0.010276935063302517
eopch: 34 step: 2 loss: 0.009868258610367775
eopch: 34 step: 3 loss: 0.010115448385477066
eopch: 34 step: 4 loss: 0.01009758748114109
eopch: 34 step: 5 loss: 0.00993285421282053
eopch: 34 step: 6 loss: 0.010174131020903587
eopch: 34 step: 7 loss: 0.010226339101791382
eopch: 34 step: 8 loss: 0.010468566790223122
eopch: 34 step: 9 loss: 0.010130532085895538
eopch: 34 step: 10 loss: 0.010167911648750305
eopch: 34 step: 11 loss: 0.010064241476356983
eopc

eopch: 44 step: 1 loss: 0.007300559431314468
eopch: 44 step: 2 loss: 0.007006799802184105
eopch: 44 step: 3 loss: 0.007196848746389151
eopch: 44 step: 4 loss: 0.007173876278102398
eopch: 44 step: 5 loss: 0.007059003692120314
eopch: 44 step: 6 loss: 0.007244831882417202
eopch: 44 step: 7 loss: 0.007319769822061062
eopch: 44 step: 8 loss: 0.007497848477214575
eopch: 44 step: 9 loss: 0.007233960088342428
eopch: 44 step: 10 loss: 0.007284587249159813
eopch: 44 step: 11 loss: 0.007186760660260916
eopch: 44 step: 12 loss: 0.007046157028526068
eopch: 44 step: 13 loss: 0.006897257175296545
eopch: 44 step: 14 loss: 0.007063171826303005
eopch: 44 step: 15 loss: 0.007202118635177612
eopch: 44 step: 16 loss: 0.007175489328801632
eopch: 44 step: 17 loss: 0.00631604203954339
eopch: 45 step: 1 loss: 0.0070723360404372215
eopch: 45 step: 2 loss: 0.006791436579078436
eopch: 45 step: 3 loss: 0.006976485252380371
eopch: 45 step: 4 loss: 0.0069471923634409904
eopch: 45 step: 5 loss: 0.006837249267846346
e

eopch: 54 step: 13 loss: 0.005179220344871283
eopch: 54 step: 14 loss: 0.0052983760833740234
eopch: 54 step: 15 loss: 0.0053858207538723946
eopch: 54 step: 16 loss: 0.005360795184969902
eopch: 54 step: 17 loss: 0.004717495758086443
eopch: 55 step: 1 loss: 0.005277046002447605
eopch: 55 step: 2 loss: 0.005084042437374592
eopch: 55 step: 3 loss: 0.005242917221039534
eopch: 55 step: 4 loss: 0.005205722525715828
eopch: 55 step: 5 loss: 0.0051289829425513744
eopch: 55 step: 6 loss: 0.005265918094664812
eopch: 55 step: 7 loss: 0.005320873111486435
eopch: 55 step: 8 loss: 0.005446747876703739
eopch: 55 step: 9 loss: 0.005235888063907623
eopch: 55 step: 10 loss: 0.0052989693358540535
eopch: 55 step: 11 loss: 0.0052332826890051365
eopch: 55 step: 12 loss: 0.005130033940076828
eopch: 55 step: 13 loss: 0.005032469052821398
eopch: 55 step: 14 loss: 0.005151201970875263
eopch: 55 step: 15 loss: 0.005237950012087822
eopch: 55 step: 16 loss: 0.005219852551817894
eopch: 55 step: 17 loss: 0.00459086475

eopch: 65 step: 7 loss: 0.004108574241399765
eopch: 65 step: 8 loss: 0.004220670089125633
eopch: 65 step: 9 loss: 0.004044041037559509
eopch: 65 step: 10 loss: 0.0041082510724663734
eopch: 65 step: 11 loss: 0.00406090822070837
eopch: 65 step: 12 loss: 0.00397265562787652
eopch: 65 step: 13 loss: 0.0038903795648366213
eopch: 65 step: 14 loss: 0.003987360745668411
eopch: 65 step: 15 loss: 0.004046839661896229
eopch: 65 step: 16 loss: 0.004039066843688488
eopch: 65 step: 17 loss: 0.003547617932781577
eopch: 66 step: 1 loss: 0.003980612847954035
eopch: 66 step: 2 loss: 0.003832610324025154
eopch: 66 step: 3 loss: 0.003964108414947987
eopch: 66 step: 4 loss: 0.003920335788279772
eopch: 66 step: 5 loss: 0.0038561129476875067
eopch: 66 step: 6 loss: 0.003961419686675072
eopch: 66 step: 7 loss: 0.004013136029243469
eopch: 66 step: 8 loss: 0.004123052582144737
eopch: 66 step: 9 loss: 0.003949185833334923
eopch: 66 step: 10 loss: 0.00401303730905056
eopch: 66 step: 11 loss: 0.003965835087001324


KeyboardInterrupt: 