In [None]:
# Importing the necessary libraries
import os
import glob
import torch 
from torch import nn
from torch import nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
from torchvision.models import resnet18
import string
from tqdm.notebook import tqdm
import cv2
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import multiprocessing as mp



In [None]:
cpu_count = mp.cpu_count()

In [None]:
data_path = "/kaggle/input/captcha-version-2-images/samples"

In [None]:
image_fns = os.listdir(data_path)
# remove samples
image_fns.remove('samples')
image_fns_train,image_fns_test = train_test_split(image_fn,random_state=1)


In [None]:
# Character Maps
image_ns = [image_fn.split('.')[0] for image_fn in image_fns]
image_ns =''.join(image_ns)
letters = sorted(list(set(list(image_ns))))
vocabulary = ['-']+ letters
idx2char = {k:v for k,v in enumerate(vocabulary,state=0)}
char2idx = {v:k for k,v in idx2char.items()}

In [1]:
# DataLoader 
batch_size=16

class CaptchaDataset(Dataset):
    def __init__(self,data_dir,image_fns):
        self.data_dir = data_dir
        self.image_fns = image_fns
    def __len__(self):
        return len(self.image_fns)
    def __getitem(self,index):
        image_fn = self.image_fn[index]
        image_fp = ospath.join(self,data_dir,image_fn)
        image = Image.open(image_fp).convert('RGB')
        image = self.transform(image)
        text = image_fn.split('.')[0]
        return image,text
    def transform(self,image):
        transform_ops = transforms.compose([transforms.ToTensor(),transforms.Nomalize(
        mean=(0.485,0.456,0.406),std=(0.229,0.224,0.225))])
        return transform_ops(image)
    
    # Train and Test datasets
    trainset = CaptchaDataset(data_dir,image_fns_train)
    testset = CaptchaDataset(data_dir,image_fns_test)
     # Loading the datasets
    train_loader = Dataloader(trainset,batch_size=batch_size,shuffle=True)
    test_loader = Dataloader(testset,batch_size=batch_size,shuffle=False)
    # Setting an iteration 
    train_iterator,valid_iterator = iter(train_loader.next())

IndentationError: expected an indented block (4225546505.py, line 4)

# CRNN MODEL

In [None]:
# Defining some of the parameters of the model
num_chars = len(char2idx)
rnn_hidden_size = 256
resnet = resnet18(pretrained=True)
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [26]:
class CRNN(nn.Module):
    def __init__(self,num_chars,rnn_hidden_size=256,dropout=0.2):
        super(CRNN).__init__()
        self.num_chars = num_chars
        self.rnn_hidden_size = rnn_hidden_size
        self.dropout =dropout
        resnet_modules = list(resnet.children()[:-3])
        self.cnn1 = nn.Sequential(*resnet_modules)
        self.cnn2 = nn.Sequential(nn.Conv2d(256,256,kernel_size=(3,6),stride=1,padding=1),
                                 nn.BatchNorm2d(256),nn.Relu(inplace=True))
        self.linear1 = nn.Linear(rnn_hidden_size,rnn_hidden_size)
        self.rnn1 = nn.GRU(rnn_hidden_size,rnn_hidden_size,bidirection=True,dropout=dropout,batch_first=True)
        self.rnn2 = nn.GRU(rnn_hidden_size,rnn_hidden_size,bidirection=True,dropout=dropout,batch_first=0)
        self.linear2 = nn.Linear(rnn_hidden_size,num_chars)
    def forward(self,batch):
        batch = self.cnn1(batch)
        batch = self.cnn2(batch)
        batch =batch.permute(0,3,1,2)
        batch_size1 = batch.size(0)
        batch_size2 = batch.size(1)
        batch = batch.view(batch_size1,batch_size2)
        batch = self.linear1(batch)
        batch,hidden = self.rnn1(batch)
        feature_size =batch.size(2)
        batch = batch[:,:,:feature_size//2]+batch[:,:,feature_size//2]
        batch,hidden = self.rnn2(batch)
        batch = self.linear2(batch)
        return batch

In [30]:
# Create a function that iniatializes the weights for our model
def weight_initializer(m):
    class_name = __class__.__name__
    if type(m) in [nn.Linear,nn.Conv2d,nn.Conv1d]:
        torch.nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0.01)
        elif class_name.find('BatchNorm')!=-1:
            m.weight_data.normal_(1.0,0.02)
            m.bias.data.fill_(0)

In [None]:
model =CRNN(num_chars,rnn_hidden_size)
text_batch_logits = model(image_batch).to(device)
criterion = nn.CTCLoss(blank=0)


In [31]:
# create a function that encodes text
def encodes_text(text_batch):
    text_batch_target_lens = [len(text)for text in text_batch]
    text_batch_target_lens = torch.IntTensor(text_batch_target_lens)
    text_batch_concat = ''.join(text_batch)
    text_batch_targets = [char2idx[c] for c in text_batch_concat]
    text_batch_targets = torch.IntTensor(text_batch_targets)
    return text_batch_targets,text_batch_target_lens
# create a function that computes our loss
def compute_loss(text_batch,text_batch_logits):
    text_batch_logPs = F.log_softmax(text_batch_logits,2)
    text_batch_logPs_lens = torch.full(size=(text_batch_logPs.size(1)),
                                      fill_value = text_batch_logPs.size(0),
                                      dtype= torch.Int32).to(device)
    text_batch_targets,text_batch_target_lens = encodes_text(text_batch)
    loss = Criterion(text_batch_logPs,text_batch_targets,text_batch_logPs_lens,text_batch_target_lens)
    return loss
compute_loss(text_batch,text_batch_logits)


# MODEL TRAINING

In [None]:
epochs = 50
lr = 1e-3
weigth_decay = 1e-3
clip_norm =5
optimizer = optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay)
lr_scheduler =optim.lr_scheduler.ReduceLROnPlateau(optimizer,verbose=True,patience=5)
model
model.apply(weight_initializer)
model = model.to(device)

# Training Loop

In [None]:
epoch_loss = []
iterator_loss = []
for epoch in tqdm(range(1,epochs+1)):
    epoch_loss_list = []
    num_updates_epochs = []
    for image_batch,text_batch in tqdm(train_loader,leave=False):
        optimizer.zero_grad()
        text_batch_logits = model(image_batch)
        loss = compute_loss(text_batch,text_batch_logits)
        iterator_loss+=loss.item()
        if np.isnan(iterator_loss) :
            continue
        num_updates_epochs +=1
        iterator_loss.append(iterator_loss)
        loss.backward()
        nn.util.clip_grad_norm(model.parameters(),clip_norm)
        optimizer.step()
        epoch_loss =np.mean(epoch_loss_list)
        num_updates_epochs.append(num_updates_epochs)
        lr_scheduler.step(epoch_loss)
        
            

In [None]:
# Function that makes predictions given text data 
def make_predictions(text_batch_logits):
    text_batch_tokens =F.Softmax(text_batch_logits,2).argmax(2)
    text_batch_tokens = text_batch_tokens.numpy().T
    text_batch_tokens_new = []
    for text_tokens in text_batch_tokens:
        text = [idx2char[idx] for idx in text_tokens]
        text = "".join(text)
        text_batch_tokens_new.append(text)
        
    return text_batch_tokens_new

Credits to
: : https://github.com/GokulKarthik/deep-learning-projects-pytorch
            ::  https://github.com/carnotaur/crnn-tutorial/

# THE END 