# Downloading and importing requeiered libraries

In [1]:
from IPython.display import clear_output

!git clone https://github.com/sodeeplearning/simpletorch
!pip install ultralytics
!pip install easyocr

clear_output(True)
print("All requiered modules have been downloaded")

All requiered modules have been downloaded


In [228]:
import torch
from ultralytics import YOLO
import matplotlib.pyplot as plt
from torchvision import transforms
import simpletorch as ST
import cv2
import easyocr
import requests
import io
import zipfile
import torchvision
import PIL
import os

# Downloading dataset

In [3]:
print("Downloading has been started")

url = "https://storage.yandexcloud.net/net-nomer-dataset/Net-Nomer-a-data_processing.zip"

response = requests.get(url)
zip = zipfile.ZipFile(io.BytesIO(response.content))
zip.extractall()
del zip

print("The dataset has been downloaded")

Downloading has been started
The dataset has been downloaded


In [45]:
def string_perf(string):
    return int(string[string.find('>') + 1: string.rfind('<')])


def xml_perf(path):
    with open(path, 'r') as f:
        file = f.read()
        massive = file.split('\n')
        return (string_perf(massive[-6]) / 1552 * 270,
                string_perf(massive[-7]) / 2592 * 480,
                string_perf(massive[-4]) / 1552 * 270,
                string_perf(massive[-5]) / 2592 * 480)

# Getting models

In [8]:
class CNN_Model (torch.nn.Module):
  def __init__(self):
    super().__init__()

    self.convolutions = torch.nn.Sequential(*[
        ST.Conv_Block(
            input_channels = 3,
            output_channels = 128,
        ),
        ST.Conv_Block(
            input_channels = 128,
            output_channels = 128,
        ),
        ST.Conv_Block(
            input_channels = 128,
            output_channels = 256,
        ),
        ST.Conv_Block(
            input_channels = 256,
            output_channels = 512,
        ),
        ST.Conv_Block(
            input_channels = 512,
            output_channels = 512,
        ),
        ST.Conv_Block(
            input_channels = 512,
            output_channels = 512,
        ),
        torch.nn.Flatten()
    ])

    self.fs = torch.nn.Sequential(*[
        torch.nn.Linear(32768, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 4),
    ])

  def forward(self, input_tensor):
    feature_map = self.convolutions(input_tensor)
    fs_bboxes = self.fs(feature_map)
    return fs_bboxes

In [138]:
class Number_detection:
  def __init__(self,
               path_to_model,
               yolo_version = "yolov9c.pt",
               device = 'cpu'):
    self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    self.detection_model = YOLO(yolo_version)
    self.cnn_model = torch.load(path_to_model).to(self.device)
    self.cnn_model.train()

  def cropping_image(self, image, x_min, y_min, x_max, y_max, x_shape, y_shape):
    top = int(y_min * y_shape)
    left = int(x_min * x_shape)
    height = int((y_max - y_min) * y_shape)
    width = int((x_max - x_min) * x_shape)
    return transforms.functional.crop(img = image,
                                      top = top,
                                      left = left,
                                      height = height,
                                      width = width)

  def detection_perform(self, image_path, min_size = 0.17):
    detection_output = self.detection_model(image_path)[0]
    classes_with_number = [2, 3, 5, 7]
    transform = transforms.Resize((512, 512))
    answer_images = torch.zeros((0, 3, 512, 512))
    image_tensor = ST.jpg_tensor(image_path)
    y_shape, x_shape = image_tensor.shape[-2:]
    num_of_preds = 0

    for current_class, (x_min, y_min, x_max, y_max) in zip(detection_output.boxes.cls, detection_output.boxes.xyxyn):
      if current_class.item() in classes_with_number and x_max - x_min >= min_size and y_max - y_min >= min_size:
        adding_image = transform(self.cropping_image(image = image_tensor,
                                                     x_min = x_min,
                                                     y_min = y_min,
                                                     x_max = x_max,
                                                     y_max = y_max,
                                                     x_shape = x_shape,
                                                     y_shape = y_shape)).unsqueeze(0)
        answer_images = torch.cat((answer_images,
                                   adding_image), dim=0)
        num_of_preds += 1
    return answer_images, num_of_preds

  def plot_boxes(self, image, coordinates, color='red'):
    x0, y0, x1, y1 = coordinates
    x_min = min(x0, x1)
    x_max = max(x0, x1)
    y_min = min(y0, y1)
    y_max = max(y0, y1)
    ST.imshow(image)
    plt.vlines(x_min, y_min, y_max, color=color)
    plt.vlines(x_max, y_max, y_min, color=color)
    plt.hlines(y_min, x_min, x_max, color=color)
    plt.hlines(y_max, x_max, x_min, color=color)

  def load_weights(self, path_to_weights):
    self.cnn_model.load_state_dict(torch.load(path_to_weights))

  def save_cnn_model(self, saving_path):
    torch.save(self.cnn_model, saving_path)

  def pred_perform(self, images_tensor, answer_bboxes):
    answer_images = []
    pil_transform = transforms.ToPILImage()
    for current_image_tensor, (x_min, y_min, x_max, y_max) in zip(images_tensor, answer_bboxes):
      cropped_image = self.cropping_image(image = current_image_tensor,
                                          x_min = x_min,
                                          y_min = y_min,
                                          x_max = x_max,
                                          y_max = y_max,
                                          x_shape = 1,
                                          y_shape = 1)
      answer_images.append(pil_transform(cropped_image))
    return answer_images

  def IMAGE_PRED (self, image_path, show = True, color = 'red', min_size = 0.17):
    images_tensor, num_of_preds = self.detection_perform(image_path, min_size=min_size)
    answer_bboxes = (self.cnn_model((images_tensor.to(self.device)) % 1)).tolist()
    received_images = self.pred_perform(images_tensor, answer_bboxes)
    return images_tensor, answer_bboxes, received_images

number_detector = Number_detection("drive/MyDrive/cnn.pt")

In [229]:
class Number_recognizer:
  def __init__(self,
               detect_model : Number_detection,
               languages = ['en'],
               image_dir = 'images',
               use_gpu = False,
               use_detector = False):
    self.text_reader = easyocr.Reader(lang_list = languages,
                                      gpu = use_gpu,
                                      detector = False)
    self.detect_model = detect_model
    self.images_dir = image_dir
    self.characters = ['A', 'B', 'C', 'E', 'H', 'K', 'M', 'O', 'P', 'T', 'X', 'Y',
              'a', 'b', 'c', 'e', 'h', 'k', 'm', 'o', 'p', 't', 'x', 'y',
              "а", "в", "с", "е", "н", "к", "м", "о", "р", "т", "х", "у",
              "А", "В", "С", "Е", "Н", "К", "М", "О", "Р", "Т", "Х", "У",
              '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']

  def PRED(self, image_path):
    images_tensor, bboxes, received_images = self.detect_model.IMAGE_PRED(image_path)
    answer_output = []

    for ind, current_image in enumerate(received_images):
      current_image.save(os.path.join(self.images_dir, f"image{ind}.jpg"))

    for ind, current_path in enumerate(ST.getting_files(self.images_dir)):
      current_output = self.text_reader.recognize(current_path,
                                                  allowlist = self.characters,
                                                  detail = 0)
      answer_output.append(current_output)

    return images_tensor, bboxes, received_images, answer_output

number_recognizer = Number_recognizer(detect_model = number_detector,
                                      use_gpu = False)



# Test prediction

In [230]:
number_recognizer.PRED("Dataset/photo/0.jpg")


image 1/1 /content/Dataset/photo/0.jpg: 384x640 8 persons, 4 cars, 2 trucks, 2 backpacks, 1357.9ms
Speed: 4.5ms preprocess, 1357.9ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)


(tensor([[[[0.9956, 0.9987, 0.9986,  ..., 0.7244, 0.8961, 0.9919],
           [0.9916, 0.9923, 0.9911,  ..., 0.7144, 0.8709, 0.9889],
           [0.9340, 0.9318, 0.9143,  ..., 0.7126, 0.8424, 0.9814],
           ...,
           [0.9499, 0.9618, 0.9684,  ..., 0.4314, 0.4091, 0.3867],
           [0.9530, 0.9670, 0.9724,  ..., 0.4256, 0.4093, 0.3999],
           [0.9478, 0.9658, 0.9755,  ..., 0.4179, 0.4080, 0.4077]],
 
          [[0.9956, 0.9987, 0.9986,  ..., 0.7375, 0.9074, 0.9891],
           [0.9916, 0.9923, 0.9910,  ..., 0.7534, 0.8968, 0.9909],
           [0.9340, 0.9317, 0.9124,  ..., 0.7672, 0.8806, 0.9902],
           ...,
           [0.9342, 0.9467, 0.9591,  ..., 0.3879, 0.3657, 0.3436],
           [0.9373, 0.9519, 0.9631,  ..., 0.3764, 0.3608, 0.3560],
           [0.9321, 0.9507, 0.9662,  ..., 0.3681, 0.3577, 0.3595]],
 
          [[0.9956, 0.9987, 0.9986,  ..., 0.6546, 0.8343, 0.9086],
           [0.9916, 0.9923, 0.9911,  ..., 0.6490, 0.8052, 0.8980],
           [0.9299, 0.92