# Train OCR text Detector quick example

In [1]:
import os
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
import sys
import torch
import warnings
from datetime import datetime
from matplotlib import pyplot as plt
from torch import nn
from torchvision.models import shufflenet_v2_x2_0

warnings.filterwarnings('ignore')

# change this property
NOMEROFF_NET_DIR = os.path.abspath('../../../../')
sys.path.append(NOMEROFF_NET_DIR)

from nomeroff_net.pipes.number_plate_text_readers.base.ocr import OCR

In [3]:
plt.rcParams["figure.figsize"] = (10, 10)

In [4]:
%matplotlib inline 

In [5]:
# auto download latest dataset
from nomeroff_net.tools import modelhub

# auto download latest dataset
#info = modelhub.download_dataset_for_model("Kz")
#PATH_TO_DATASET = info["dataset_path"]

# local path dataset
PATH_TO_DATASET = os.path.join(NOMEROFF_NET_DIR, "./data/dataset/TextDetector/ocr_example")
PATH_TO_DATASET

'/mnt/data/var/www/nomeroff-net/./data/dataset/TextDetector/ocr_example'

In [6]:
DATASET_NAME = "kz"
VERSION = f"{datetime.now().strftime('%Y_%m_%d')}_pytorch_lightning"

RESULT_MODEL_PATH = os.path.join(NOMEROFF_NET_DIR, 
                                 "models/", 
                                 'anpr_ocr_{}_{}.ckpt'.format(DATASET_NAME, VERSION))
RESULT_MODEL_PATH

'/mnt/data/var/www/nomeroff-net/models/anpr_ocr_kz_2022_11_21_pytorch_lightning.ckpt'

In [7]:
color_channels, height, width = 3, 50, 200

conv_modules = list(shufflenet_v2_x2_0(pretrained=True).children())[:-3]
conv_nn = nn.Sequential(*conv_modules)
print("conv_nn shape", conv_nn(torch.rand((1, color_channels, height, width))).shape)

conv_nn shape torch.Size([1, 488, 4, 13])


In [8]:
class kz(OCR):
    def __init__(self):
        OCR.__init__(self)
        # only for usage model
        # in train generate automaticly
        self.letters = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H",
                        "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
        
        # Params
        self.height = 50
        self.width = 200
        self.hidden_size = 32
        self.linear_size = 512
        self.backbone = shufflenet_v2_x2_0
        
        
        # Train hyperparameters
        self.batch_size = 4
        self.epochs = 5
        self.gpus = torch.cuda.device_count()

In [9]:
ocrTextDetector = kz()
model = ocrTextDetector.prepare(PATH_TO_DATASET, use_aug=False, num_workers=1)

GET ALPHABET
Max plate length in "val": 9
Max plate length in "train": 9
Max plate length in "test": 9
Letters train  {'6', 'B', '0', '9', '7', '4', 'C', 'H', 'S', '3', '1', 'X', 'K', '8', 'M', '-', 'A', ' ', '2'}
Letters val  {'6', 'B', '0', '9', '7', '4', 'C', 'H', 'S', '3', '1', 'X', 'K', '8', 'M', '-', 'A', ' ', '2'}
Letters test  {'6', 'B', '0', '9', '7', '4', 'C', 'H', 'S', '3', '1', 'X', 'K', '8', 'M', '-', 'A', ' ', '2'}
Max plate length in train, test and val do match
Letters in train, val and test do match
Letters:   - 0 1 2 3 4 6 7 8 9 A B C H K M S X
START BUILD DATA


100%|██████████| 4/4 [00:00<00:00, 2305.20it/s]
100%|██████████| 4/4 [00:00<00:00, 10761.52it/s]
100%|██████████| 4/4 [00:00<00:00, 6129.78it/s]

DATA PREPARED





In [10]:
#ocrTextDetector.load(RESULT_MODEL_PATH)

In [11]:
# # tune
# lr_finder = ocrTextDetector.tune()

# # Plot with
# fig = lr_finder.plot(suggest=True)
# fig.show()

In [12]:
ocrTextDetector.train()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name             | Type       | Params
------------------------------------------------
0 | conv_nn          | Sequential | 1.2 M 
1 | linear1          | Linear     | 999 K 
2 | recurrent_layer1 | BlockRNN   | 139 K 
3 | recurrent_layer2 | BlockRNN   | 16.9 K
4 | linear2          | Linear     | 1.3 K 
------------------------------------------------
2.3 M     Trainable params
0         Non-trainable params
2.3 M     Total params
9.284     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  rank_zero_deprecation(


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

[INFO] best model path /mnt/data/var/www/data/logs/ocr/epoch=4-step=4-v12.ckpt


NPOcrNet(
  (conv_nn): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (2): Sequential(
      (0): InvertedResidual(
        (branch1): Sequential(
          (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Conv2d(24, 122, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (3): BatchNorm2d(122, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (4): ReLU(inplace=True)
        )
        (branch2): Sequential(
          (0): Conv2d(24, 122, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(122, eps=1e

In [13]:
#ocrTextDetector.save(RESULT_MODEL_PATH)

In [14]:
#ocrTextDetector.load(RESULT_MODEL_PATH)

In [15]:
ocrTextDetector.test_acc(verbose=True)
#ocrTextDetector.val_acc(verbose=False)
#ocrTextDetector.train_acc(verbose=False)


[INFO] /mnt/data/var/www/nomeroff-net/./data/dataset/TextDetector/ocr_example/test/img/AA0013BM.png
Predicted:  			 True: aa0013bm

[INFO] /mnt/data/var/www/nomeroff-net/./data/dataset/TextDetector/ocr_example/test/img/0038SC-0.png
Predicted:  			 True: 0038sc

[INFO] /mnt/data/var/www/nomeroff-net/./data/dataset/TextDetector/ocr_example/test/img/10001_2914KC7_0.png
Predicted:  			 True: 2914 kc-7

[INFO] /mnt/data/var/www/nomeroff-net/./data/dataset/TextDetector/ocr_example/test/img/1000_X371HK96_0.png
Predicted:  			 True: x371hk96
Testing Accuracy:  0.0


0.0

## than train with augumentation

In [16]:
# for i in range(0,1):
#     # Train next 2 epochs on augumentated dataset
#     ocrTextDetector.epochs += 2

#     # prepare with augumentation
#     ocrTextDetector.prepare(PATH_TO_DATASET, use_aug=True, num_workers=1, seed=i)

#     # Plot with
#     #fig = lr_finder.plot(suggest=True)
#     #fig.show()
#     model = ocrTextDetector.train(seed=i, ckpt_path=RESULT_MODEL_PATH)
#     ocrTextDetector.test_acc(verbose=False)
#     ocrTextDetector.save(RESULT_MODEL_PATH)

## model stat

In [17]:
model = ocrTextDetector.model

pytorch_total_params = sum(p.numel() for p in model.parameters())

size_model = 0
for param in model.parameters():
    if param.data.is_floating_point():
        size_model += param.numel() * torch.finfo(param.data.dtype).bits
    else:
        size_model += param.numel() * torch.iinfo(param.data.dtype).bits
print(f"model size: {size_model} / bit | {size_model / 8e6:.2f} / MB")
print("total params:", pytorch_total_params)

model size: 74273280 / bit | 9.28 / MB
total params: 2321040
