# Train OCR text Detector quick example

In [9]:
import os
import sys
import torch
import warnings
from datetime import datetime
from matplotlib import pyplot as plt
from torchvision.models import resnet18
warnings.filterwarnings('ignore')

# change this property
NOMEROFF_NET_DIR = os.path.abspath('../../../../')
sys.path.append(NOMEROFF_NET_DIR)

from nomeroff_net.text_detectors.base.ocr import OCR

In [2]:
plt.rcParams["figure.figsize"] = (10, 10)

In [3]:
%matplotlib inline 

In [4]:
# auto download latest dataset
from nomeroff_net.tools import modelhub

# auto download latest dataset
info = modelhub.download_dataset_for_model("Kz")
PATH_TO_DATASET = info["dataset_path"]

# local path dataset
#PATH_TO_DATASET = os.path.join(NOMEROFF_NET_DIR, "./data/dataset/TextDetector/ocr_example")

In [5]:
PATH_TO_DATASET

'/var/www/nomeroff-net/nomeroff_net/tools/../../data/./dataset/TextDetector/Kz/autoriaNumberplateOcrKz-2019-04-26'

In [6]:
DATASET_NAME = "kz"
VERSION = f"{datetime.now().strftime('%Y_%m_%d')}_pytorch_lightning"

RESULT_MODEL_PATH = os.path.join(NOMEROFF_NET_DIR, 
                                 "models/", 
                                 'anpr_ocr_{}_{}.ckpt'.format(DATASET_NAME, VERSION))

In [7]:
RESULT_MODEL_PATH

'/var/www/nomeroff-net/models/anpr_ocr_kz_2022_11_08_pytorch_lightning.ckpt'

In [10]:
class kz(OCR):
    def __init__(self):
        OCR.__init__(self)
        # only for usage model
        # in train generate automaticly
        self.letters = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H",
                        "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
        
        # Params
        self.height = 50
        self.width = 200
        self.hidden_size = 32
        self.backbone = resnet18
        
        
        # Train hyperparameters
        self.batch_size = 32
        self.epochs = 100
        self.gpus = torch.cuda.device_count()

In [11]:
ocrTextDetector = kz()
model = ocrTextDetector.prepare(PATH_TO_DATASET, use_aug=False, num_workers=1)

GET ALPHABET
Max plate length in "val": 8
Max plate length in "train": 8
Max plate length in "test": 8
Letters train  {'3', 'X', 'B', 'Q', '8', 'Y', 'M', 'W', 'G', 'O', 'N', 'S', 'V', '7', 'C', '2', 'A', 'R', '4', 'F', 'K', '1', 'H', 'D', 'P', 'U', '5', 'L', 'Z', 'T', 'E', '0', 'J', '9', 'I', '6'}
Letters val  {'3', 'X', 'Q', 'B', '8', 'Y', 'M', 'W', 'G', 'O', 'N', 'S', 'V', '7', 'C', '2', 'A', 'R', '4', 'F', 'K', '1', 'H', 'D', 'P', 'U', 'L', '5', 'Z', 'T', 'E', '0', 'J', '9', 'I', '6'}
Letters test  {'3', 'X', 'Q', 'B', '8', 'Y', 'W', 'M', 'G', 'N', 'O', 'S', 'V', '7', 'C', '2', 'A', 'R', '4', 'F', 'K', '1', 'H', 'D', 'P', 'U', 'L', '5', 'Z', 'T', 'E', '0', 'J', '9', 'I', '6'}
Max plate length in train, test and val do match
Letters in train, val and test do match
Letters: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
START BUILD DATA


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8642/8642 [00:00<00:00, 20594.96it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1001/1001 [00:00<00:00, 20893.56it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 279/279 [00:00<00:00, 20030.65it/s]

DATA PREPARED





In [12]:
#ocrTextDetector.load(RESULT_MODEL_PATH)

In [13]:
# # tune
# lr_finder = ocrTextDetector.tune()
#
# # Plot with
# fig = lr_finder.plot(suggest=True)
# fig.show()

In [None]:
ocrTextDetector.train()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type       | Params
------------------------------------------------
0 | conv_nn          | Sequential | 2.8 M 
1 | linear1          | Linear     | 426 K 
2 | recurrent_layer1 | BlockRNN   | 115 K 
3 | recurrent_layer2 | BlockRNN   | 16.9 K
4 | linear2          | Linear     | 2.4 K 
------------------------------------------------
3.3 M     Trainable params
0         Non-trainable params
3.3 M     Total params
13.375    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [13]:
ocrTextDetector.save(RESULT_MODEL_PATH)

model save to /mnt/data/var/www/nomeroff-net/models/anpr_ocr_kz_2022_03_24_pytorch_lightning.ckpt


In [14]:
ocrTextDetector.load(RESULT_MODEL_PATH)

NPOcrNet(
  (cnn): BlockCNN(
    (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1,), padding=(1, 1))
    (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (linear1): Linear(in_features=1024, out_features=512, bias=True)
  (gru1): BlockRNN(
    (gru): LSTM(512, 32, batch_first=True, bidirectional=True)
  )
  (gru2): BlockRNN(
    (gru): LSTM(32, 32, batch_first=True, bidirectional=True)
  )
  (linear2): Linear(in_features=64, out_features=20, bias=True)
)

In [16]:
ocrTextDetector.model

NPOcrNet(
  (conv_nn): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True

In [18]:
pytorch_total_params = sum(p.numel() for p in ocrTextDetector.model.parameters())
pytorch_total_params

3466661

In [20]:
import torch

model = ocrTextDetector.model

size_model = 0
for param in model.parameters():
    if param.data.is_floating_point():
        size_model += param.numel() * torch.finfo(param.data.dtype).bits
    else:
        size_model += param.numel() * torch.iinfo(param.data.dtype).bits
print(f"model size: {size_model} / bit | {size_model / 8e6:.2f} / MB")

model size: 110933152 / bit | 13.87 / MB


In [15]:
ocrTextDetector.test_acc(verbose=True)
#ocrTextDetector.val_acc(verbose=False)
#ocrTextDetector.train_acc(verbose=False)


[INFO] /var/www/nomeroff-net/nomeroff_net/tools/../../data/./dataset/TextDetector/Kz/autoriaNumberplateOcrKz-2019-04-26/test/img/12575349.jpg-0.png
Predicted: 777ee04 			 True: 777eee04

[INFO] /var/www/nomeroff-net/nomeroff_net/tools/../../data/./dataset/TextDetector/Kz/autoriaNumberplateOcrKz-2019-04-26/test/img/90955706-32-full.jpg-0.png
Predicted: h23602 			 True: h236602

[INFO] /var/www/nomeroff-net/nomeroff_net/tools/../../data/./dataset/TextDetector/Kz/autoriaNumberplateOcrKz-2019-04-26/test/img/12542889.jpg-0.png
Predicted: 2205ae07 			 True: 205ae07

[INFO] /var/www/nomeroff-net/nomeroff_net/tools/../../data/./dataset/TextDetector/Kz/autoriaNumberplateOcrKz-2019-04-26/test/img/90034772-13-full.jpg-0.png
Predicted: b677co 			 True: a677cno

[INFO] /var/www/nomeroff-net/nomeroff_net/tools/../../data/./dataset/TextDetector/Kz/autoriaNumberplateOcrKz-2019-04-26/test/img/90999739-9-full.jpg-0.png
Predicted: 456lb13 			 True: 456llb13

[INFO] /var/www/nomeroff-net/nomeroff_net/too

0.9175627240143369

## than train with augumentation

In [17]:
for i in range(0,1):
    # Train next 2 epochs on augumentated dataset
    ocrTextDetector.epochs += 2

    # prepare with augumentation
    ocrTextDetector.prepare(PATH_TO_DATASET, use_aug=True, num_workers=1, seed=i)

    # Plot with
    #fig = lr_finder.plot(suggest=True)
    #fig.show()
    model = ocrTextDetector.train(seed=i, ckpt_path=RESULT_MODEL_PATH)
    ocrTextDetector.test_acc(verbose=False)
    ocrTextDetector.save(RESULT_MODEL_PATH)