In [1]:
import os
import torch
import pandas as pd
import callbacks
import data
import dataUtils as du
import image
import metric
import modelArc
import trainer
import tqdm
from config import ConfigFile
from datetime import datetime


In [2]:
#load data
#Specify path to main database
data_dir = r"D:\photos\RCNN4\BBOXES"
model_path = r"D:\Projects\reciept-scanner\RCNN\models"
database, vocab, max_len = [], set(), 0

largest_index = 492


print("dataset of " + str(largest_index) + " images")

for id in range(0, largest_index+1):

    img_path = os.path.join(data_dir, str(id) + ".jpg").replace("\\","/")
    
    if os.path.exists(img_path):

        with open(os.path.join(data_dir, str(id) + ".txt").replace("\\","/"), 'r') as file:
            ground_truths = [line.strip() for line in file.readlines()]
        
        for line in ground_truths:
            if not line.strip() == '':
                label = line.rstrip("\n")
                database.append([img_path, label])
                vocab.update(list(label))
                max_len = max(max_len, len(label))
    else:
        print("image with index " + str(id) + " do not exist")

print("database, vocab, max_len, complete")
print(database)

dataset of 492 images
image with index 187 do not exist
image with index 190 do not exist
image with index 194 do not exist
image with index 197 do not exist
image with index 199 do not exist
image with index 354 do not exist
image with index 366 do not exist
image with index 370 do not exist
image with index 372 do not exist
image with index 381 do not exist
image with index 393 do not exist
database, vocab, max_len, complete
[['D:/photos/RCNN4/BBOXES/0.jpg', 'AMOUNT'], ['D:/photos/RCNN4/BBOXES/0.jpg', '$23.99'], ['D:/photos/RCNN4/BBOXES/1.jpg', 'SHINHO SOY BEAN PASTE'], ['D:/photos/RCNN4/BBOXES/1.jpg', '2 @ $1.39ea.'], ['D:/photos/RCNN4/BBOXES/1.jpg', 'W $2.78'], ['D:/photos/RCNN4/BBOXES/2.jpg', 'SALE) PREVIOUSLY FZ BLACK COO'], ['D:/photos/RCNN4/BBOXES/2.jpg', '1.220 kg @ $15.17/kg'], ['D:/photos/RCNN4/BBOXES/2.jpg', 'W $18.51'], ['D:/photos/RCNN4/BBOXES/3.jpg', '(SALE) STRAWBERRY'], ['D:/photos/RCNN4/BBOXES/3.jpg', '576431'], ['D:/photos/RCNN4/BBOXES/3.jpg', '2 @2/$6.00'], ['D:/pho

In [3]:
#create data loaders
model_config = ConfigFile(name = "CRNN1", path = model_path, lr=0.0003, bs=32)

model_config.vocab = "".join(vocab)
model_config.max_txt_len = max_len
model_config.save()

dataset_loader = data.DataLoader(dataset = database, batch_size = model_config.batch_size, 
                                 data_preprocessors = [image.ImageReader(image.CVImage)], 
                                 transformers = [du.ImageResizer(model_config.width, model_config.height), du.LabelIndexer(model_config.vocab), 
                                                 du.LabelPadding(padding_value = len(model_config.vocab), max_word_len = max_len)])#, du.ImageShowCV2()


train_set, val_set = dataset_loader.split(split = 0.7)

train_set.augmentors = [
    du.RandomBrightness(),
    du.RandomErodeDilate(),
    du.RandomSharpen(),
    du.RandomRotate(angle=10),
    ]

config saved toD:/Projects/reciept-scanner/RCNN/models/202407100110


In [4]:
#initialize model, optimizer, and loss
model = modelArc.CRNN(len(model_config.vocab))
loss = trainer.CTCLoss(blank = len(model_config.vocab))
optimizer = torch.optim.Adam(model.parameters(), lr=model_config.lr)

if torch.cuda.is_available():
    model = model.cuda()
    print("CUDA Enabled...Training On GPU")


CUDA Enabled...Training On GPU


In [5]:
#initialze callbacks and trainer
earlystop = callbacks.EarlyStopping(monitor = "val_CER", patience = 10, verbose = True)
ckpt = callbacks.ModelCheckpoint((model_config.model_path + "/model.pt").replace("\\","/"), monitor = "val_CER", verbose = True)
tracker = callbacks.TensorBoard((model_config.model_path + "/logs").replace("\\","/"))
auto_lr = callbacks.ReduceLROnPlateau(monitor = "val_CER", factor=0.9, patience = 10, verbose = True)
save_model = callbacks.Model2onnx(saved_model_path = (os.path.join(model_path, datetime.strftime(datetime.now(), "%Y%m%d%H%M"),"model.pt").replace("\\","/")), input_shape = (1, model_config.height, model_config.width, 3), verbose = True, metadata = {"vocab": model_config.vocab})


train_struct = trainer.Trainer(model, optimizer, loss, metrics = [metric.CERMetric(model_config.vocab), metric.WERMetric(model_config.vocab)])

In [6]:
#train
train_struct.run(train_set, val_set, epochs=2, callbacks = [ckpt, tracker, auto_lr, save_model])#earlystop,

train_set.to_csv(os.path.join(model_config.model_path, "train.csv").replace("\\","/"))
val_set.to_csv(os.path.join(model_config.model_path, "val.csv").replace("\\","/"))

 [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m                                               
 [99m0/[93m25[0m[0m   [99m        -        [0m  [99m   -    [0m                                             
  return F.conv2d(input, weight, bias, self.stride,
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
[K[F[K[F [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m     [1mloss[0m     [1mCER[0m    [1mWER[0m    [1mlr[0m                 
 [99m1/[93m25[0m[0m   [99m00:00:00<[93m00:00:15[0m[0m  [99m1.52it/s[0m  [99m37.9856[0m  [99m5.1628[0m  [99m1.0[0m  [99m0.0003[0m               
[1mEpoch 1: [0m   4.0% |[97m█[0m[97m█[0m                                                           |[K[F[K[F [1mIters[0m    [1mElapsed Time[0m      [1mSpeed[0m     [1mloss[0m     [1mCER[0m     [1mWER[0m      [1mlr[0m              
 [99m3/[93m25[0m[0m   [99m00:00:01<[93m00:00:07[0

128
torch.Size([1, 32, 128, 3])


INFO:Model2onnx:Model saved to D:\Projects\reciept-scanner\RCNN\models\202407100110\model.onnx
