# Prequisites

In [None]:
!pip install ultralytics
!pip install -q kaggle
!pip install xmltodict

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
import os
from tqdm.notebook import tqdm
import cv2
import xmltodict
import json
import matplotlib.pyplot as plt
import random

In [None]:
def convert_to_yolo_format(xmin, xmax, ymin, ymax, image_width, image_height):
    """Converts bounding box coordinates to YOLO format.

    Args:
        xmin: Minimum x-coordinate of the bounding box.
        xmax: Maximum x-coordinate of the bounding box.
        ymin: Minimum y-coordinate of the bounding box.
        ymax: Maximum y-coordinate of the bounding box.
        image_width: Width of the image.
        image_height: Height of the image.

    Returns:
        A list containing the bounding box coordinates in YOLO format:
        [center_x, center_y, width, height]
    """

    dw = 1.0 / image_width
    dh = 1.0 / image_height
    x_center = (xmin + xmax) / 2.0 * dw
    y_center = (ymin + ymax) / 2.0 * dh
    width = (xmax - xmin) * dw
    height = (ymax - ymin) * dh
    return [x_center, y_center, width, height]

# Data Prep

In [None]:
!mkdir /content/data
!mkdir /content/data/images
!mkdir /content/data/labels

## NP-1

In [None]:
!kaggle datasets download -d saisirishan/indian-vehicle-dataset

In [None]:
!unzip indian-vehicle-dataset.zip

In [None]:
main_folder = "/content/State-wise_OLX"
for i in tqdm(os.listdir(main_folder)):
  state = os.path.join(main_folder, i)
  for j in tqdm(os.listdir(state)):
    if(j.endswith(".xml")):
      annotation_path = os.path.join(state, j)
      img_path = os.path.join(state, j.replace(".xml", ".jpg"))
      img = cv2.imread(img_path)
      with open(annotation_path, encoding="utf8") as fd:
        temp = xmltodict.parse(fd.read())
      xmin = float(temp["annotation"]["object"]["bndbox"]["xmin"])
      xmax = float(temp["annotation"]["object"]["bndbox"]["xmax"])
      ymin = float(temp["annotation"]["object"]["bndbox"]["ymin"])
      ymax = float(temp["annotation"]["object"]["bndbox"]["ymax"])
      try: #There are some xml files without any corresponding jpg images
        image_width = img.shape[1]
      except:
        continue
      image_height = img.shape[0]
      yolo_format = convert_to_yolo_format(xmin, xmax, ymin, ymax, image_width, image_height)
      x_center = yolo_format[0]
      y_center = yolo_format[1]
      width = yolo_format[2]
      height = yolo_format[3]
      clss = 0 #Class id 0 for number plates
      new_img_path = os.path.join("/content/data/images", j.replace(".xml", "") + ".jpg")
      new_annotation_path = os.path.join("/content/data/labels", j.replace(".xml", "") + ".txt")
      cv2.imwrite(new_img_path, img)
      with open(new_annotation_path, "w") as f:
        f.write(str(clss) + "\t" + str(x_center) + "\t" + str(y_center) + "\t" + str(width) + "\t" + str(height) + "\n")

In [None]:
len(os.listdir("/content/data/images")), len(os.listdir("/content/data/labels"))

In [None]:
!rm -rf indian-vehicle-dataset.zip
!rm -rf /content/State-wise_OLX
!rm -rf /content/google_images
!rm -rf /content/video_images

## Vehicle - 1

In [None]:
!kaggle datasets download -d boulahchichenadir/cars-object-detection
#https://www.kaggle.com/datasets/ashfakyeafi/road-vehicle-images-dataset

In [None]:
!unzip /content/cars-object-detection.zip

In [None]:
!rm -rf /content/cars-object-detection.zip

In [None]:
main_folder = "/content/DATA/DATA/train"
for i in tqdm(os.listdir(main_folder)):
  if(i.endswith(".jpg")):
    img_path = os.path.join(main_folder, i)
  else:
    continue
  annotation_path = os.path.join(main_folder, i.replace(".jpg", ".xml"))
  with open(annotation_path, encoding="utf8") as fd:
    temp = xmltodict.parse(fd.read())
  img = cv2.imread(img_path)
  image_height, image_width = img.shape[0], img.shape[1]
  lis = []
  try:
    for j in temp["annotation"]["object"]:
      xmin = float(j["bndbox"]["xmin"])
      xmax = float(j["bndbox"]["xmax"])
      ymin = float(j["bndbox"]["ymin"])
      ymax = float(j["bndbox"]["ymax"])
      yolo_format = convert_to_yolo_format(xmin, xmax, ymin, ymax, image_width, image_height)
      clss = 1
      x_center = yolo_format[0]
      y_center = yolo_format[1]
      width = yolo_format[2]
      height = yolo_format[3]
      line = str(clss) + "\t" + str(x_center) + "\t" + str(y_center) + "\t" + str(width) + "\t" + str(height) + "\n"
      lis.append(line)
  except:
    j = temp["annotation"]["object"]
    xmin = float(j["bndbox"]["xmin"])
    xmax = float(j["bndbox"]["xmax"])
    ymin = float(j["bndbox"]["ymin"])
    ymax = float(j["bndbox"]["ymax"])
    yolo_format = convert_to_yolo_format(xmin, xmax, ymin, ymax, image_width, image_height)
    x_center = yolo_format[0]
    y_center = yolo_format[1]
    width = yolo_format[2]
    height = yolo_format[3]
    clss = 1
    line = str(clss) + "\t" + str(x_center) + "\t" + str(y_center) + "\t" + str(width) + "\t" + str(height) + "\n"
    lis.append(line)
  new_img_path = os.path.join("/content/data/images", i)
  new_annotation_path = os.path.join("/content/data/labels", i.replace(".jpg", ".txt"))
  cv2.imwrite(new_img_path, img)
  with open(new_annotation_path, "w") as f:
    for k in lis:
      f.write(k+"\n")

In [None]:
len(os.listdir("/content/data/images")), len(os.listdir("/content/data/labels"))

In [None]:
!rm -rf /content/DATA

# Yolo Training

In [None]:
#Clean mismatched labels and images if any present

path1 = "/content/data/images"
path2 = "/content/data/labels"

lis1 = []
lis2 = []
for i in tqdm(os.listdir(path1)):
  temp = i.replace(".jpg", "")
  lis1.append(temp)

for i in tqdm(os.listdir(path2)):
  temp = i.replace(".txt", "")
  lis2.append(temp)

if(len(lis1) > len(lis2)):
  for item in lis1:
    if(item not in lis2):
      os.remove(os.path.join(path1, item+".jpg"))

if(len(lis2) > len(lis1)):
  for item in lis2:
    if(item not in lis1):
      os.remove(os.path.join(path2, item+".txt"))

In [None]:
len(os.listdir("/content/data/images")), len(os.listdir("/content/data/labels"))

In [None]:
!rm -rf /content/final_data

In [None]:
print("Number of images are ", len(os.listdir("/content/data/images")))
total_length = len(os.listdir("/content/data/images"))
!mkdir /content/final_data
!mkdir /content/final_data/train
!mkdir /content/final_data/validation
!mkdir /content/final_data/train/images
!mkdir /content/final_data/train/labels
!mkdir /content/final_data/validation/images
!mkdir /content/final_data/validation/labels

In [None]:
#Gotta divide it into training and validation sets

lis = os.listdir("/content/data/images")
lis = random.sample(lis, len(lis)) #Randomize the whole data

count = 0
for i in tqdm(os.listdir("/content/data/images")):
  img_path = os.path.join("/content/data/images", i)
  img = cv2.imread(img_path)
  annotation_path = os.path.join("/content/data/labels", i.replace(".jpg",".txt"))
  temp = open(annotation_path, "r")
  temp = temp.read()
  if (count%6 == 0):
    new_annotation_path = os.path.join("/content/final_data/validation/labels", i.replace(".jpg", ".txt"))
    with open(new_annotation_path, "w") as f:
      f.write(temp)
    new_img_path = os.path.join("/content/final_data/validation/images", i)
    cv2.imwrite(new_img_path, img)
  else:
    new_annotation_path = os.path.join("/content/final_data/train/labels", i.replace(".jpg", ".txt"))
    with open(new_annotation_path, "w") as f:
      f.write(temp)
    new_img_path = os.path.join("/content/final_data/train/images", i)
    cv2.imwrite(new_img_path, img)
  count = count+1

In [None]:
print("Number of training images are ", len(os.listdir("/content/final_data/train/images")))
print("Number of Validation images are", len(os.listdir("/content/final_data/validation/images")))

In [None]:
# Create data.yaml

'''
train: training images absolute path
val: validation images absolute path
nc: number of classes
names: class names in the order you have prepared your data in a list
'''

#Not including any script to create the data.yaml, please create it on your own

'''
train: /content/final_data/train/images
val: /content/final_data/validation/images
nc: 2
classes: ["number_plate", "vehicle"]
'''

## Yolov8

In [None]:
from ultralytics import YOLO

In [None]:
model = YOLO("yolov8s.pt")

In [None]:
model.train(data="/content/data.yaml",
            epochs=20,
            imgsz=640,
            batch=-1,
            device=0)

In [None]:
from google.colab import files
files.download("/content/runs/detect/train4/weights/best.pt")

### Yolov8 Export and Testing

In [None]:
from ultralytics import YOLO

In [None]:
model = YOLO("/content/runs/detect/train2/weights/best.pt")

In [None]:
model.export(format="onnx",
             dynamic=True,
             simplify=True)