# Downloading and importing requiered libraries

In [1]:
from IPython.display import clear_output

!git clone https://github.com/sodeeplearning/simpletorch
!pip install ultralytics

clear_output(True)
print("All requiered modules have been downloaded")

All requiered modules have been downloaded


In [2]:
from ultralytics import YOLO
import simpletorch as ST
import io
import zipfile
import requests
import json
import os
import torch
from torchvision import transforms
from torchsummary import summary
import matplotlib.pyplot as plt
import supervision as sv
import torchvision

print("All requiered libraries have been imported")

All requiered libraries have been imported


In [3]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cpu')

# Downloading dataset

In [None]:
print("Downloading has been started")

url = "https://storage.yandexcloud.net/net-nomer-dataset/Net-Nomer-a-data_processing.zip"

response = requests.get(url)
zip = zipfile.ZipFile(io.BytesIO(response.content))
zip.extractall()
del zip

print("The dataset has been downloaded")
print("Starting detection dataset processing")

def image_num(path):
  last_slash = path.rfind('/')
  dot = path.rfind('.')
  return int(path[last_slash + 1:dot])

def string_perf(string):
    return int(string[string.find('>') + 1: string.rfind('<')])

def xml_perf(path):
    with open(path, 'r') as f:
        file = f.read()
        massive = file.split('\n')
        return (string_perf(massive[-7]) / 2592,
                string_perf(massive[-6]) / 1552,
                string_perf(massive[-5]) / 2592,
                string_perf(massive[-4]) / 1552)

def bboxes_dataset(path_to_folder):
  files = sorted(ST.getting_files(path_to_folder), key=image_num)
  answer_tensor = torch.zeros((len(files), 4))
  for ind, current_path in enumerate(files):
    answer_tensor[ind] = torch.tensor(xml_perf(current_path))
  print(files)
  return answer_tensor

train_bboxes_dataset = bboxes_dataset("Dataset/boxes")

print("The dataset has been performed")

# Getting model and prediction analizing

In [None]:
detection_model = YOLO('yolov9c.pt')

In [None]:
output = detection_model("Dataset/photo/0.jpg")
output

# Adaptating dataset

## Getting dataset (Skip if loaded)

In [None]:
multy_factor = 5

In [None]:
def cropping_image(image, x_min, y_min, x_max, y_max, x_shape, y_shape):
  top = int(y_min * y_shape)
  left = int(x_min * x_shape)
  height = int((y_max - y_min) * y_shape)
  width = int((x_max - x_min) * x_shape)
  return transforms.functional.crop(img = image,
                                    top = top,
                                    left = left,
                                    height = height,
                                    width = width)

def detection_pred_processing(image_id):
  image_path = f"Dataset/photo/{image_id}.jpg"
  detection_output = detection_model(image_path)[0]
  answer_tensor = torch.zeros((0, 4))
  number_plate_coord = train_bboxes_dataset[image_id]
  nx_min, ny_min, nx_max, ny_max = number_plate_coord
  classes_with_number = [2, 3, 5, 7]
  transform = transforms.Resize((512, 512))
  answer_images = torch.zeros((0, 3, 512, 512))
  image_tensor = ST.jpg_tensor(image_path)
  y_shape, x_shape = image_tensor.shape[-2:]

  for current_class, (x_min, y_min, x_max, y_max) in zip(detection_output.boxes.cls, detection_output.boxes.xyxyn):
    if current_class.item() in classes_with_number:
      if x_min < nx_min and y_min < ny_min and x_max > nx_max and y_max > ny_max:
        coordinates = torch.tensor([(nx_min - x_min) / (x_max - x_min) * 0.96,
                                    (ny_min - y_min) / (y_max - y_min) * 0.97,
                                    (nx_max - x_min) / (x_max - x_min) * 1.04,
                                    (ny_max - y_min) / (y_max - y_min) * 1.03]).unsqueeze(0)
        answer_tensor = torch.cat((answer_tensor,
                                   coordinates),dim=0)
        adding_image = transform(cropping_image(image = image_tensor,
                                      x_min = x_min,
                                      y_min = y_min,
                                      x_max = x_max,
                                      y_max = y_max,
                                      x_shape = x_shape,
                                      y_shape = y_shape)).unsqueeze(0)
        answer_images = torch.cat((answer_images,
                                   adding_image), dim=0)

  return answer_images, answer_tensor

In [None]:
def get_dataset():
  answer_images = torch.zeros((0, 3, 512, 512))
  answer_bboxes = torch.zeros((0, 4))

  for i in range(202):
    print(f"{i + 1} sample now")
    current_images, current_bboxes = detection_pred_processing(i)
    answer_images = torch.cat((answer_images,
                               current_images), dim=0)
    answer_bboxes = torch.cat((answer_bboxes,
                               current_bboxes), dim=0)

  return answer_images, answer_bboxes

In [None]:
images_dataset, bboxes_dataset = get_dataset()
clear_output(True)
print("The dataset has been performed")

In [None]:
def multy_image(image, multy_factor): # Increasing dataset size in multy_factor times
    augmentation = ST.transforms.Compose([
        ST.transforms.ToPILImage(),
        ST.transforms.ColorJitter(
            brightness=0.4,
            contrast=0.3,
            saturation=0.3,
            hue=0.1,
        ),
        ST.transforms.ToTensor()
    ])

    answer = torch.zeros((multy_factor, 3, 512, 512))

    for current_augmentation in range(multy_factor):
        answer[current_augmentation] = augmentation(image)

    return answer

def augment_dataset(dataset, multy_factor, num_of_images = 202):
    images_tensor = dataset

    train_images = torch.zeros((0, 3, 512, 512))
    for ind, current_image in enumerate(images_tensor):
        train_images = torch.cat([train_images, multy_image(
            image = current_image,
            multy_factor = multy_factor
        )], dim = 0)
    return train_images

augmented_images_dataset = augment_dataset(dataset = images_dataset,
                                           multy_factor = multy_factor,
                                           num_of_images = images_dataset.shape[0])

In [None]:
saving_dir = "drive/MyDrive/"
torch.save(augmented_images_dataset, saving_dir + "Detection_images.pt")
torch.save(bboxes_dataset, saving_dir + "Detection_bboxes.pt")

##loading dataset

In [None]:
multy_factor = 5

In [None]:
path_to_data = "drive/MyDrive/"
augmented_images_dataset = torch.load(path_to_data + "Detection_images.pt")
bboxes_dataset = torch.load(path_to_data + "Detection_bboxes.pt") * 512

In [None]:
train_images = augmented_images_dataset[:160 * multy_factor]
val_images = augmented_images_dataset[160 * multy_factor:]
train_bboxes = bboxes_dataset[:160]
val_bboxes = bboxes_dataset[160:]

#Getting CNN model

In [None]:
class CNN_Model (torch.nn.Module):
  def __init__(self):
    super().__init__()

    self.convolutions = torch.nn.Sequential(*[
        ST.Conv_Block(
            input_channels = 3,
            output_channels = 128,
        ),
        ST.Conv_Block(
            input_channels = 128,
            output_channels = 128,
        ),
        ST.Conv_Block(
            input_channels = 128,
            output_channels = 256,
        ),
        ST.Conv_Block(
            input_channels = 256,
            output_channels = 512,
        ),
        ST.Conv_Block(
            input_channels = 512,
            output_channels = 512,
        ),
        ST.Conv_Block(
            input_channels = 512,
            output_channels = 512,
        ),
        torch.nn.Flatten()
    ])

    self.fs = torch.nn.Sequential(*[
        torch.nn.Linear(32768, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 4),
    ])

  def forward(self, input_tensor):
    feature_map = self.convolutions(input_tensor)
    fs_bboxes = self.fs(feature_map)
    return fs_bboxes

cnn_model = CNN_Model().to(device)
print(f"{sum(p.numel() for p in cnn_model.parameters() if p.requires_grad)} params in model")
cnn_model

# Training CNN model

In [None]:
def dist_loss(bboxes_pred, bboxes_true):
  return torchvision.ops.distance_box_iou_loss(bboxes_pred, bboxes_true, reduction = 'mean')

In [None]:
def iou (bboxes_pred, bboxes_true):
  return torchvision.ops.complete_box_iou_loss(train_pred, train_true, reduction = 'mean').log() * -1

In [None]:
num_of_epochs = 2000
train_batch_size = 15
learning_rate = 1e-4
scheduler_gamma = 0.5
scheduler_freq = 400
change_loss = 200

show_every = 10
val_every = 10
val_batch_size = 15
best_val = 0.1

cnn_loss = torch.nn.L1Loss()
optimizer = torch.optim.Adam(cnn_model.parameters(), lr = learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = scheduler_gamma)

In [None]:
losses = {"train" : [], "val" : []}
cnn_model.train()

for epoch in range(1, num_of_epochs + 1):
  optimizer.zero_grad()
  train_batch = torch.randint(high = train_images.shape[0], size = [train_batch_size])

  train_pred = cnn_model(train_images[train_batch].to(device))
  train_true = train_bboxes[train_batch // multy_factor].to(device)

  train_loss = cnn_loss(train_pred, train_true)# + dist_loss(train_pred, train_true)

  train_loss.backward()
  optimizer.step()

  losses["train"].append(train_loss.item())

  if epoch % val_every == 0:
    with torch.no_grad():
      val_batch = torch.randint(high = val_images.shape[0], size = [val_batch_size])
      val_pred = cnn_model(val_images[val_batch].to(device))
      val_true = val_bboxes[val_batch // multy_factor].to(device)

      val_loss = cnn_loss(val_pred, val_true)# + dist_loss(train_pred, train_true)

      losses["val"].append(val_loss.item())
      print(val_loss.item())
      if (val_loss.item() < best_val):
        torch.save(cnn_model.state_dict(), "drive/MyDrive/cnn_best_weights.pt")
        best_val = val_loss.item()
        print("The weights have been updated")

  if epoch % scheduler_freq == 0:
    scheduler.step()

  if epoch % show_every == 0:
      clear_output(True)
      fig, ax = plt.subplots(figsize=(30, 10))
      plt.title("Loss graph")
      plt.plot(losses["train"], ".-", label="Training Loss")
      plt.plot(torch.arange(0, epoch, show_every), losses["val"], ".-", label="Validation Loss")
      plt.xlabel("Iteration")
      plt.ylabel("Loss")
      plt.legend()
      plt.grid()
      plt.show()


# Checking CNN training

In [None]:
def cnn_pred(image_id, color_pred = 'blue', color_right = 'green'):
  #cnn_model.load_state_dict(torch.load("best_weights.pt"))
  image = val_images[image_id]
  #image = adjust_image(val_images[image_id])
  cnn_model.train()
  annotator = sv.BoxAnnotator()
  transform = transforms.Resize((512, 512))
  to_pil = transforms.ToPILImage()

  image_tensor = transform(image.unsqueeze(0)).to(device)
  coordinates = (cnn_model(image_tensor)).tolist()
  rx0, ry0, rx1, ry1 = val_bboxes[image_id // multy_factor].clone()

  x0, y0, x1, y1 = coordinates[0]
  x_min = min(x0, x1)
  x_max = max(x0, x1)
  y_min = min(y0, y1)
  y_max = max(y0, y1)
  ST.imshow(image)
  plt.vlines(x_min, y_min, y_max, color=color_pred)
  plt.vlines(x_max, y_max, y_min, color=color_pred)
  plt.hlines(y_min, x_min, x_max, color=color_pred)
  plt.hlines(y_max, x_max, x_min, color=color_pred)

  plt.vlines(rx0, ry0, ry1, color=color_right)
  plt.vlines(rx1, ry1, ry0, color=color_right)
  plt.hlines(ry0, rx0, rx1, color=color_right)
  plt.hlines(ry1, rx1, rx0, color=color_right)

  return coordinates

pred_bboxes_s = cnn_pred(10)

# Using the model

In [None]:
import torch
import ultralytics
import matplotlib.pyplot as plt
from torchvision import transforms

class Number_detection:
  def __init__(self,
               path_to_model,
               yolo_version = "yolov9c.pt",
               device = 'cpu'):
    self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    self.detection_model = YOLO(yolo_version)class Number_detection:
  def __init__(self,
               path_to_model,
               yolo_version = "yolov9c.pt",
               device = 'cpu'):
    self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    self.detection_model = YOLO(yolo_version)
    self.cnn_model = torch.load(path_to_model).to(self.device)
    self.cnn_model.train()

  def cropping_image(self, image, x_min, y_min, x_max, y_max, x_shape, y_shape):
    top = int(y_min * y_shape)
    left = int(x_min * x_shape)
    height = int((y_max - y_min) * y_shape)
    width = int((x_max - x_min) * x_shape)
    return transforms.functional.crop(img = image,
                                      top = top,
                                      left = left,
                                      height = height,
                                      width = width)

  def detection_perform(self, image_path, min_size = 0.17):
    detection_output = self.detection_model(image_path)[0]
    classes_with_number = [2, 3, 5, 7]
    transform = transforms.Resize((512, 512))
    answer_images = torch.zeros((0, 3, 512, 512))
    image_tensor = ST.jpg_tensor(image_path)
    y_shape, x_shape = image_tensor.shape[-2:]
    num_of_preds = 0

    for current_class, (x_min, y_min, x_max, y_max) in zip(detection_output.boxes.cls, detection_output.boxes.xyxyn):
      if current_class.item() in classes_with_number and x_max - x_min >= min_size and y_max - y_min >= min_size:
        adding_image = transform(self.cropping_image(image = image_tensor,
                                                     x_min = x_min,
                                                     y_min = y_min,
                                                     x_max = x_max,
                                                     y_max = y_max,
                                                     x_shape = x_shape,
                                                     y_shape = y_shape)).unsqueeze(0)
        answer_images = torch.cat((answer_images,
                                   adding_image), dim=0)
        num_of_preds += 1
    return answer_images, num_of_preds

  def plot_boxes(self, image, coordinates, color='red'):
    x0, y0, x1, y1 = coordinates
    x_min = min(x0, x1)
    x_max = max(x0, x1)
    y_min = min(y0, y1)
    y_max = max(y0, y1)
    ST.imshow(image)
    plt.vlines(x_min, y_min, y_max, color=color)
    plt.vlines(x_max, y_max, y_min, color=color)
    plt.hlines(y_min, x_min, x_max, color=color)
    plt.hlines(y_max, x_max, x_min, color=color)

  def load_weights(self, path_to_weights):
    self.cnn_model.load_state_dict(torch.load(path_to_weights))

  def save_cnn_model(self, saving_path):
    torch.save(self.cnn_model, saving_path)

  def pred_perform(self, images_tensor, answer_bboxes):
    answer_images = []
    pil_transform = transforms.ToPILImage()
    for current_image_tensor, (x_min, y_min, x_max, y_max) in zip(images_tensor, answer_bboxes):
      cropped_image = self.cropping_image(image = current_image_tensor,
                                          x_min = x_min,
                                          y_min = y_min,
                                          x_max = x_max,
                                          y_max = y_max,
                                          x_shape = 1,
                                          y_shape = 1)
      answer_images.append(pil_transform(cropped_image))
    return answer_images

  def IMAGE_PRED (self, image_path, show = True, color = 'red', min_size = 0.17):
    images_tensor, num_of_preds = self.detection_perform(image_path, min_size=min_size)
    answer_bboxes = (self.cnn_model((images_tensor.to(self.device)) % 1)).tolist()
    received_images = self.pred_perform(images_tensor, answer_bboxes)
    return images_tensor, answer_bboxes, received_images

number_detector = Number_detection("drive/MyDrive/cnn.pt")
    self.cnn_model = torch.load(path_to_model).to(self.device)
    self.cnn_model.train()

  def cropping_image(self, image, x_min, y_min, x_max, y_max, x_shape, y_shape):
    top = int(y_min * y_shape)
    left = int(x_min * x_shape)
    height = int((y_max - y_min) * y_shape)
    width = int((x_max - x_min) * x_shape)
    return transforms.functional.crop(img = image,
                                      top = top,
                                      left = left,
                                      height = height,
                                      width = width)

  def detection_perform(self, image_path, min_size = 0.17):
    detection_output = self.detection_model(image_path)[0]
    classes_with_number = [2, 3, 5, 7]
    transform = transforms.Resize((512, 512))
    answer_images = torch.zeros((0, 3, 512, 512))
    image_tensor = ST.jpg_tensor(image_path)
    y_shape, x_shape = image_tensor.shape[-2:]
    num_of_preds = 0

    for current_class, (x_min, y_min, x_max, y_max) in zip(detection_output.boxes.cls, detection_output.boxes.xyxyn):
      if current_class.item() in classes_with_number and x_max - x_min >= min_size and y_max - y_min >= min_size:
        adding_image = transform(self.cropping_image(image = image_tensor,
                                                     x_min = x_min,
                                                     y_min = y_min,
                                                     x_max = x_max,
                                                     y_max = y_max,
                                                     x_shape = x_shape,
                                                     y_shape = y_shape)).unsqueeze(0)
        answer_images = torch.cat((answer_images,
                                   adding_image), dim=0)
        num_of_preds += 1
    return answer_images, num_of_preds

  def plot_boxes(self, image, coordinates, color='red'):
    x0, y0, x1, y1 = coordinates
    x_min = min(x0, x1)
    x_max = max(x0, x1)
    y_min = min(y0, y1)
    y_max = max(y0, y1)
    ST.imshow(image)
    plt.vlines(x_min, y_min, y_max, color=color)
    plt.vlines(x_max, y_max, y_min, color=color)
    plt.hlines(y_min, x_min, x_max, color=color)
    plt.hlines(y_max, x_max, x_min, color=color)

  def load_weights(self, path_to_weights):
    self.cnn_model.load_state_dict(torch.load(path_to_weights))

  def save_cnn_model(self, saving_path):
    torch.save(self.cnn_model, saving_path)

  def image_pred (self, image_path, show = True, color = 'red', min_size = 0.17):
    images_tensor, num_of_preds = self.detection_perform(image_path, min_size=min_size)
    answer_bboxes = (self.cnn_model((images_tensor.to(self.device)) % 1)).tolist()
    return images_tensor, answer_bboxes

number_detector = Number_detection(path_to_model="cnn.pt")