In [1]:
!pip3 install editdistance==0.5.3
!pip3 install numba==0.49.0

Collecting editdistance==0.5.3
  Downloading editdistance-0.5.3-cp37-cp37m-manylinux1_x86_64.whl (179 kB)
[K     |████████████████████████████████| 179 kB 4.9 MB/s eta 0:00:01
[?25hInstalling collected packages: editdistance
Successfully installed editdistance-0.5.3
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m
Collecting numba==0.49.0
  Downloading numba-0.49.0-cp37-cp37m-manylinux2014_x86_64.whl (3.6 MB)
[K     |████████████████████████████████| 3.6 MB 4.1 MB/s eta 0:00:01
Collecting llvmlite<=0.33.0.dev0,>=0.31.0.dev0
  Downloading llvmlite-0.32.1-cp37-cp37m-manylinux1_x86_64.whl (20.2 MB)
[K     |████████████████████████████████| 20.2 MB 37.3 MB/s eta 0:00:01    |██████████████▎                 | 9.0 MB 37.3 MB/s eta 0:00:01
[?25hInstalling collected packages: llvmlite, numba
Successfully installed llvmlite-0.32.1 numba-0.49.0
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' com

In [2]:
import os
import string
import datetime

from data.generator import DataGenerator, Tokenizer
from network.model import HTRModel

In [21]:
input_size = (64, 32, 1)
max_text_length = 32
charset_base = string.printable[:95]

source='iam_cvl'
arch ="puigcerver" # puigcerver, bluche, flor,
batch_size = 350
lr = 0.0001
type_of_run = 'train'
if type_of_run == 'train':
    train_model = False
else:
    train_model = True

In [22]:
source_path = os.path.join("/floyd/input/words_htr_combo/{0}.hdf5".format(source))
output_path = os.path.join("/floyd/home/output_words_combo", source, arch)
target_path = os.path.join(output_path, "checkpoint_weights.hdf5")

assert os.path.isfile(source_path) or os.path.isfile(target_path)
os.makedirs(output_path, exist_ok=True)

In [23]:
dtgen = DataGenerator(source=source_path,
                      batch_size=batch_size,
                      charset=charset_base,
                      max_text_length=max_text_length,
                      predict=train_model)

model = HTRModel(architecture=arch,
                 input_size=input_size,
                 vocab_size=dtgen.tokenizer.vocab_size,
                 beam_width=10,
                 stop_tolerance=20,
                 reduce_tolerance=15)

model.compile(learning_rate=lr)
model.load_checkpoint(target=target_path)

model.summary(output_path, "summary.txt")
callbacks = model.get_callbacks(logdir=output_path, checkpoint=target_path, verbose=1)

/floyd/input/words_htr_combo/iam_cvl.hdf5
Model: "model_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 64, 32, 1)]       0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 64, 32, 16)        160       
_________________________________________________________________
batch_normalization_33 (Batc (None, 64, 32, 16)        64        
_________________________________________________________________
leaky_re_lu_33 (LeakyReLU)   (None, 64, 32, 16)        0         
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 32, 16, 16)        0         
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 32, 16, 32)        4640      
_________________________________________________________________
batch_normalizati

In [24]:
print(f"Train images: {dtgen.size['train']}")
print(f"Valid images: {dtgen.size['valid']}")
print(f"Test images: {dtgen.size['test']}")

Train images: 121562
Valid images: 6421
Test images: 14209


In [None]:
start_time = datetime.datetime.now()

h = model.fit(x=dtgen.next_train_batch(),
              epochs=400,
              steps_per_epoch=dtgen.steps['train'],
              validation_data=dtgen.next_valid_batch(),
              validation_steps=dtgen.steps['valid'],
              callbacks=callbacks,
              shuffle=True,
              verbose=1)

total_time = datetime.datetime.now() - start_time

loss = h.history['loss']
val_loss = h.history['val_loss']

min_val_loss = min(val_loss)
min_val_loss_i = val_loss.index(min_val_loss)

time_epoch = (total_time / len(loss))
total_item = (dtgen.size['train'] + dtgen.size['valid'])

t_corpus = "\n".join([
    f"Total train images:      {dtgen.size['train']}",
    f"Total validation images: {dtgen.size['valid']}",
    f"Batch:                   {dtgen.batch_size}\n",
    f"Total time:              {total_time}",
    f"Time per epoch:          {time_epoch}",
    f"Time per item:           {time_epoch / total_item}\n",
    f"Total epochs:            {len(loss)}",
    f"Best epoch               {min_val_loss_i + 1}\n",
    f"Training loss:           {loss[min_val_loss_i]:.8f}",
    f"Validation loss:         {min_val_loss:.8f}"
])

with open(os.path.join(output_path, "train.txt"), "w") as lg:
    lg.write(t_corpus)
    print(t_corpus)

Epoch 1/400
Epoch 00001: val_loss improved from inf to 17.62555, saving model to /floyd/home/output_words_combo/iam_cvl/puigcerver/checkpoint_weights.hdf5
Epoch 2/400