### Author: Russell Ault

# This Notebook Contains code for exploring the functionality of the Pytorch Handwriting Recognition Library

## It is clear to me that I need to write interactive code to load a model that we have trained and test it on a validation set, and have it output accuracy and word error rates. 

## The present way that the library is written is clearly not easily conducive to this

## Here are some comments I have about the code and how to improve it:
- In evaluating model accuracy a character by character accuracy is being used, not an edit distance. I need to put character and word error rates into the model. I should include a mean and sd of these parameters.
- I think I need to just run the validation code right now to see what it does.
- I think that the main python module should be refactored to allow its use in other python modules.

# Reproduce Main Functionality in Notebook fashion

In [1]:
from __future__ import print_function
import argparse
import random
import torch
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import numpy as np
from warpctc_pytorch import CTCLoss
import os
import utils
import dataset

In [2]:
import models.crnn as crnn

In [3]:
import sys  
stdout = sys.stdout
reload(sys)  
sys.setdefaultencoding('latin-1')
from model_error import cer, wer


#My workaround was that at the top of the script, I import sys, and store sys.stdout in a separate variable, e.g. stdout.
sys.stdout = stdout
print(sys.getdefaultencoding())

latin-1


In [39]:
from model_error import cer, wer

In [5]:
print(3+3)

6


In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--trainroot', required=True, help='path to dataset')
parser.add_argument('--valroot', required=True, help='path to dataset')
parser.add_argument('--workers', type=int, help='number of data loading workers', default=2)
parser.add_argument('--batchSize', type=int, default=64, help='input batch size')
parser.add_argument('--imgH', type=int, default=32, help='the height of the input image to network')
parser.add_argument('--imgW', type=int, default=100, help='the width of the input image to network')
parser.add_argument('--nh', type=int, default=256, help='size of the lstm hidden state')
parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for')
parser.add_argument('--lr', type=float, default=0.01, help='learning rate for Critic, default=0.00005')
parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
parser.add_argument('--cuda', action='store_true', help='enables cuda')
parser.add_argument('--ngpu', type=int, default=1, help='number of GPUs to use')
parser.add_argument('--crnn', default='', help="path to crnn (to continue training)")
parser.add_argument('--alphabet', type=str, default='0123456789abcdefghijklmnopqrstuvwxyz')
parser.add_argument('--experiment', default=None, help='Where to store samples and models')
parser.add_argument('--displayInterval', type=int, default=500, help='Interval to be displayed')
parser.add_argument('--n_test_disp', type=int, default=10, help='Number of samples to display when test')
parser.add_argument('--valInterval', type=int, default=500, help='Interval to be displayed')
parser.add_argument('--saveInterval', type=int, default=500, help='Interval to be displayed')
parser.add_argument('--adam', action='store_true', help='Whether to use adam (default is rmsprop)')
parser.add_argument('--adadelta', action='store_true', help='Whether to use adadelta (default is rmsprop)')
parser.add_argument('--keep_ratio', action='store_true', help='whether to keep ratio for image resize')
parser.add_argument('--random_sample', action='store_true', help='whether to sample the dataset with random sampler')
opt = parser.parse_args()
print(opt)

In [None]:
if experiment is None:
    experiment = 'expr'
os.system('mkdir {0}'.format(experiment))

### Set variables

In [18]:
trainroot = "/home/ubuntu/russell/nephi/data/lmdb/train"
valroot = "/home/ubuntu/russell/nephi/data/lmdb/val"
batchSize = 64
nh = 256                  # size of the LSTM hidden state
imgW = 100
imgH = 32
ngpu = 1
beta1 = 0.5
lr = 0.0001
workers = 10
keep_ratio = True
adam = True
adadelta = False
n_test_disp = 100

alph_file_dylan = "/home/ubuntu/dylan/nephi/alphabet.txt"
alph_file_russell = "/home/ubuntu/russell/nephi/alphabet.txt"
alphabet = '0123456789abcdefghijklmnopqrstuvwxyzB- EÂ¬Ã¼.RSÅ«J/DHA:K¤¿ZLGFNTPCOVWIM<8d>Ä<81><9f>,<93>È³¶'
#0123456789abcdefghijklmnopqrstuvwxyzW VCGū¬.HM,ILAZ:BTÿSER<BC>JFāP<9F>NDKOȳ<B6>
#<A4><8D>()—̈-<84><93>Q<96>/Y<BE>U<>+  # This is what I got from Dylan's file

untrained_crnn_dylan = "/home/ubuntu/dylan/nephi/expr/netCRNN_1_100.pth"
lesstrained_crnn_dylan = "/home/ubuntu/dylan/nephi/expr/netCRNN_1000_100.pth"
trained_crnn_russell = "/home/ubuntu/russell/nephi/expr/netCRNN_3870_100.pth"
trained_crnn_dylan = "/home/ubuntu/dylan/nephi/expr/netCRNN_3210_100.pth"

In [5]:
manualSeed = random.randint(1, 10000)  # fix seed
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
np.random.seed(manualSeed)
torch.manual_seed(manualSeed)

Random Seed:  6501


<torch._C.Generator at 0x7fa9d0bf2ba0>

In [6]:
cudnn.benchmark = True
cuda = True

#if torch.cuda.is_available() and not cuda:
#    print("WARNING: You have a CUDA device, so you should probably run with --cuda")

train_dataset = dataset.lmdbDataset(root=trainroot)
sampler = dataset.randomSequentialSampler(train_dataset, batchSize)
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batchSize, sampler=sampler,
    num_workers=int(workers),
    collate_fn=dataset.alignCollate(imgH=imgH, imgW=imgW, keep_ratio=keep_ratio))
test_dataset = dataset.lmdbDataset(
    root=valroot, transform=dataset.resizeNormalize((imgW, imgH)))   # I have changed this line from the original code

In [7]:
# Load in the two alphabets
alphabet_russell = ''
alphabet_dylan = ''

with open(alph_file_russell, 'r') as myfile:
    alphabet_russell = myfile.read()
with open(alph_file_dylan, 'r') as myfile:
    alphabet_dylan = myfile.read()
    


In [8]:
# Test the dylan ALphabet and model first
alphabet = alphabet_dylan

nclass = len(alphabet) + 1
nc = 1

converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()

In [9]:
# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

In [10]:
crnn = crnn.CRNN(imgH, nc, nclass, nh)

In [11]:
crnn.apply(weights_init)



CRNN (
  (cnn): Sequential (
    (conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu0): ReLU (inplace)
    (pooling0): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU (inplace)
    (pooling1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
    (relu2): ReLU (inplace)
    (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU (inplace)
    (pooling2): MaxPool2d (size=(2, 2), stride=(2, 1), dilation=(1, 1))
    (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
    (relu4): ReLU (inplace)
    (conv5): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), pad

# Based on the above unexpected key error, I will assume that when I try to run the original code with a validation epoch number, I will get the same error

In [12]:
image = torch.FloatTensor(batchSize, 3, imgH, imgH)
text = torch.IntTensor(batchSize * 5)          # RA: I don't understand why the text has this size
length = torch.IntTensor(batchSize)

if cuda:
    crnn.cuda()
    crnn = torch.nn.DataParallel(crnn, device_ids=range(ngpu))
    image = image.cuda()
    criterion = criterion.cuda()

In [13]:
# Load Dylan's pretrained model first
trained_crnn = trained_crnn_dylan
if trained_crnn != '':
    print('loading pretrained model from %s' % trained_crnn)
    crnn.load_state_dict(torch.load(trained_crnn))
print(crnn)

loading pretrained model from /home/ubuntu/dylan/nephi/expr/netCRNN_3210_100.pth
DataParallel (
  (module): CRNN (
    (cnn): Sequential (
      (conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu0): ReLU (inplace)
      (pooling0): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu1): ReLU (inplace)
      (pooling1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
      (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchnorm2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      (relu2): ReLU (inplace)
      (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (relu3): ReLU (inplace)
      (pooling2): MaxPool2d (size=(2, 2), stride=(2, 1), dilation=(1, 1))
      (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (batchnorm4): BatchNorm2d(5

In [14]:
image = Variable(image)
text = Variable(text)
length = Variable(length)

# loss averager
loss_avg = utils.averager()

# setup optimizer
if adam:
    optimizer = optim.Adam(crnn.parameters(), lr=lr,
                           betas=(beta1, 0.999))
elif adadelta:
    optimizer = optim.Adadelta(crnn.parameters(), lr=lr)
else:
    optimizer = optim.RMSprop(crnn.parameters(), lr=lr)

# Here is where I will test out the code.

### First order of business is to see what val outputs currently on these pretrained models using the test set.
### Then add word and character error rate and a way to calculate mean and standard deviation of them.

In [25]:
def val(net, dataset, criterion, max_iter=100):
    print('Start val')

    for p in crnn.parameters():
        p.requires_grad = False

    net.eval()
    data_loader = torch.utils.data.DataLoader(
        dataset, shuffle=True, batch_size=batchSize, num_workers=int(workers))
    val_iter = iter(data_loader)

    i = 0
    n_correct = 0
    loss_avg = utils.averager()
    
    image_count = 0
    
    # Character and word error rate lists
    char_error = []
    w_error = []

    max_iter = min(max_iter, len(data_loader))
    #max_iter = len(data_loader)
    for i in range(max_iter):
        data = val_iter.next()
        i += 1
        cpu_images, cpu_texts = data
        batch_size = cpu_images.size(0)
        image_count = image_count + batch_size
        utils.loadData(image, cpu_images)
        t, l = converter.encode(cpu_texts)
        utils.loadData(text, t)
        utils.loadData(length, l)

        preds = crnn(image)
        preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
        cost = criterion(preds, text, preds_size, length) / batch_size
        loss_avg.add(cost)
        
        
        # RA: While I am not sure yet, it looks like a greedy decoder and not beam search is being used here
        # Also, a simple character by character accuracy is being used, not an edit distance.
        # Case is ignored in the accuracy, which is not ideal for an actual working system
        
        _, preds = preds.max(2)
        preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, preds_size.data, raw=False)
        for pred, target in zip(sim_preds, cpu_texts):
            if pred == target.lower():
                n_correct += 1
            #print(pred)
            #print("Pred: %s; target: %s" % (pred, target))
            char_error.append(cer(pred, target.lower()))
            w_error.append(wer(pred, target.lower()))

    raw_preds = converter.decode(preds.data, preds_size.data, raw=True)[:n_test_disp]
    for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts):
        print('%-20s => %-20s, gt: %-20s' % (raw_pred, pred, gt))

    accuracy = n_correct / float(max_iter * batchSize)
    print('Test loss: %f, accuray: %f' % (loss_avg.val(), accuracy))
    
    char_arr =np.array(char_error)
    w_arr = np.array(w_error)
    #numpy.std(arr, ddof=1)
    #numpy.mean(arr, axis=0)
    #print("All character error rates:")
    #print(char_error)
    #print("All word error rates")
    #print(w_error)
    print("Character error rate mean: %4.4f; Character error rate sd: %4.4f" % (np.mean(char_arr), np.std(char_arr, ddof=1)))
    print("Word error rate mean: %4.4f; Word error rate sd: %4.4f" % (np.mean(w_arr), np.std(w_arr, ddof=1)))
    print("Total number of images in validation set: %8d" % image_count)
    return (char_error, w_error)

In [16]:
def trainBatch(net, criterion, optimizer):
    data = train_iter.next()
    cpu_images, cpu_texts = data
    batch_size = cpu_images.size(0)
    utils.loadData(image, cpu_images)
    t, l = converter.encode(cpu_texts)
    utils.loadData(text, t)
    utils.loadData(length, l)

    preds = crnn(image)
    preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size))
    cost = criterion(preds, text, preds_size, length) / batch_size
    crnn.zero_grad()
    cost.backward()
    optimizer.step()
    return cost

### Now see how the pre-trained model works on the validation set
oops, I have to figure out how to change the kernal of this python notebook...

In [33]:
val(crnn, test_dataset, criterion)

Start val
aas dernnaet  ann h- daaa. => as dernaet an h da. , gt: Die Genneral ambts Rait¬
llchr t--n dz�-�-n-ewiils. => lchr tn dzÿnewils. , gt: Alhie. sonnder Meniglich
aamme-  bllrz--neem-mma-ns => ame blrznemmans     , gt: Ainer Loblichen Regierūng.
maa---nin. deoornū�nnssen => manin. deornūnsen  , gt: Locheman. Jeronimūsen
wal------n  win---dde----. => waln winde.         , gt: Melchior Wūrmbrandt.
air-ranster  aibe -geegmn. => airranster aibe gegmn., gt: aūf negsten Ratstag wid¬
aeeaan d2  lea---w- olsien => aean d2 leaw olsien , gt: Aber Ain CoPȳ an Ir G:
aab  mal  aenn iimee  d��. => ab mal aen ime d��. , gt: Als well man seiner aūf
haarautr  dewezzi wed ww-n => harautr dewezi wed wn, gt: Mandaten. dergleichen Wöhrn
vme saūrwednt a�h bem-mon => vme saūrwednt a�h bemmon, gt: von Rovereid, aūf Laȳen
zei-----merw---ppaa-n---n. => zeimerwpann.        , gt: Reinhardt von Pūechhaim.
vnn---dd-ll-tt hze---t---. => vndlt hzet.         , gt: Landts bet:         
ma�-�rr  ha--reirgge-nden

In [16]:
val(crnn_dylan_trained, test_dataset, criterion)

Start val
imns sggtel�in  wer�n��ntt => imns sgtel�in wer�n�nt, gt: dise Motiūen, worūmb
herrren lann-nddshh�ūbba� => heren lanndshūba�  , gt: herrn Lanndthaūbt¬
im  rr-s---r  anzzail zint => im rsr anzail zint  , gt: in grosser Anzal aūf.
aiiee, a�s  bacchen olllig => aie, a�s bachen olig, gt: Ainer aūf Laÿen etlich 
die  herr--megehenns. vvnd => die hermegehens. vnd, gt: die Thor angehengt: vnd
------------1-----------35 => 135                 , gt: 203                 
man--en,, aaddlli- vnnddet => manen, adli vndet   , gt: gannzer Adelich. vnd Er¬
vnd ma�-s ee dammedthherr. => vnd ma�s e damedther., gt: vnd Aūsser Lanndtsūer¬
vmdd aamz-eiengg-n-e malll => vmd amzeiengne mal  , gt: vnd Aūsgeben Zūūerhalt
herrr-n an-nnd h�ūlbta-nn => hern annd hūlbtan  , gt: herrn Lanndthaūbtman
zūeggihibenn werdden  iin => zūegihiben werden in, gt: Zūegschriben werden. ain
lem--ngaarr  an--ee. bea¬ => lemngar ane. bea¬  , gt: Pennzinger. hannsen Loche¬
būeehen  aūnnddn  wofffr => būehen aūndn wofr ,

In [26]:
# Now with the character and word error rates
char_e, w_e = val(crnn, test_dataset, criterion)

Start val
-------11---------------66 => 16                  , gt: 193                 
gns pūrrrennninssee-  nnd => gns pūreninse nd   , gt: N. Būrgermaisster: vnd
haūnddroeeen  srrffffer�. => haūndroen srfer�.  , gt: Anndreen Rotten¬   
hannnnnn aūgeen senndeer. => han aūgen sender.  , gt: khomen migen. sonnder
m-ttrt hū� hehers tr bi¬ => mtrt hū hehers tr bi¬, gt: Matheūs Hofsteter  
airttei wlls s-felwmmmee-s => airtei wls sfelwmes , gt: Aūfkhaūfft. Als soll deßweg
pen ssgeegch aa-sccileienn => pen sgegch ascileien, gt: Passbrief Angehalten.
fen-ns nnnden ii tetrge--. => fenns nden i tetrge., gt: Personen An Jezt verseh.
wee-nn-ntenn vnnndderrlcch => wennten vnderlch    , gt: darūnter sonnderlich
han�stt vdd i  grbgggzai¬ => han�st vd i grbgzai¬, gt: haūß. vnd Zū Rūgg Zieh.
vn--n-tttgeern----snmfffff => vnntgernsnmf        , gt: vnd versich¬       
lileerpos gefeoo wa--f-nr� => lilerpos gefeo wafnr�, gt: es bei der getanen Abschaff¬
tatterannichee. gen-ndict  => tateraniche. genndic

In [19]:
crnn.load_state_dict(torch.load(lesstrained_crnn_dylan))

In [24]:
char_e, w_e = val(crnn, test_dataset, criterion)

Start val
ha-  der herrrrūen-nnicht => ha der herūennicht , gt: hat der herr Būrgermaist
an� gbllss--nneiierd- ciss => an� gblsneierd cis  , gt: Aūsglassen wūrde. sich 
gennnnddeennn scshgmm-lh-t => genden scshgmlht    , gt: geennden .2. Vieh Märckht
o----lllccher go--n-e-err� => olcher goneer�      , gt: Loblichen Cammer    
gr----tt-----r  ete-----¬ => grtr ete¬          , gt: Antoni Jacob        
pa---fer peerweennses dder => pafer perwenses der , gt: Talfer Prūggen, so der
sa-ss-ss  wesssseiigenmen. => sass weseigenmen.   , gt: Straff verboten wirdet,
-------------------------. => .                   , gt: bet:                
mi---. va�fth za-tmnnrinj. => mi. va�fth zatmnrinj., gt: groß noch khlain vieh
wa--rdden  an- sssllbee-nn => warden an slben     , gt: werden. Inen Zollern
ge----see--mpe--s-fer-ier. => gesempesferier.     , gt: Caspar Artsteter.   
vn---ndd  paa-----n----tr. => vnnd pantr.         , gt: vnd Lanndts¬       
v-------iibb----------t--. => vibt.               , 

# Next things to do:
1) Make the word error and character error code robust to empty sets ("such as give a dummy variable if length < 1")
2) Incorporate character and word error rates into the training set too

In [None]:
trained_crnn = trained_crnn_dylan
if trained_crnn != '':
    print('loading pretrained model from %s' % trained_crnn)
    crnn.load_state_dict(torch.load(trained_crnn))

In [26]:
char_e
char_a = np.array(char_e)


In [33]:
print(char_a)
print("%4.3f" % np.mean(char_a))

[ 0.77777778  0.5         1.04761905 ...,  1.30769231  0.63636364
  0.52631579]
0.862


In [1]:
#crnn_dylan_trained = crnn
crnn_dylan_trained

NameError: name 'crnn_dylan_trained' is not defined

### As of 17 February 2018, the machine is learning to read. It is rough, but it is learning. This is exciting!

In [None]:
for epoch in range(niter):
    train_iter = iter(train_loader)
    i = 0
    while i < len(train_loader):
        for p in crnn.parameters():
            p.requires_grad = True
        crnn.train()

        cost = trainBatch(crnn, criterion, optimizer)
        loss_avg.add(cost)
        i += 1

        if i % displayInterval == 0:
            print('[%d/%d][%d/%d] Loss: %f' %
                  (epoch, niter, i, len(train_loader), loss_avg.val()))
            loss_avg.reset()

        if i % valInterval == 0:
            val(crnn, test_dataset, criterion)

        # do checkpointing
        if i % saveInterval == 0:
            torch.save(
                crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(experiment, epoch, i))