# Custom Training with YOLOv5

In this tutorial, we assemble a dataset and train a custom YOLOv5 model to recognize the objects in our dataset. To do so we will take the following steps:

* Gather a dataset of images and label our dataset
* Export our dataset to YOLOv5
* Train YOLOv5 to recognize the objects in our dataset
* Evaluate our YOLOv5 model's performance
* Run test inference to view our model at work



![](https://uploads-ssl.webflow.com/5f6bc60e665f54545a1e52a5/615627e5824c9c6195abfda9_computer-vision-cycle.png)

# Step 1: Install Requirements

In [None]:

import torch
import os
from IPython.display import Image, clear_output  # to display images

print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Cloning into 'yolov5'...
remote: Enumerating objects: 16556, done.[K
remote: Counting objects: 100% (148/148), done.[K
remote: Compressing objects: 100% (101/101), done.[K
remote: Total 16556 (delta 66), reused 116 (delta 47), pack-reused 16408[K
Receiving objects: 100% (16556/16556), 15.18 MiB | 18.33 MiB/s, done.
Resolving deltas: 100% (11331/11331), done.
/content/yolov5
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m716.0/716.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.2/70.2 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.3/158.3 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90

In [None]:
!pip install patool
import patoolib
patoolib.extract_archive('/content/testset.zip')

Collecting patool
  Downloading patool-2.2.0-py2.py3-none-any.whl (96 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.0/96.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: patool
Successfully installed patool-2.2.0


INFO patool: Extracting /content/testset.zip ...
INFO:patool:Extracting /content/testset.zip ...
INFO patool: running /usr/bin/7z x -o./Unpack_i3wctmbs -- /content/testset.zip
INFO:patool:running /usr/bin/7z x -o./Unpack_i3wctmbs -- /content/testset.zip
INFO patool:     with input=''
INFO:patool:    with input=''
INFO patool: ... /content/testset.zip extracted to `testset'.
INFO:patool:... /content/testset.zip extracted to `testset'.


'testset'

In [None]:
import os

# Get the current working directory
current_directory = os.getcwd()

print("Current Directory:", current_directory)


Current Directory: /content/yolov5


In [None]:
# Inference for Single Rider in a Frame

import torch
import os

# Load the model
model = torch.hub.load('ultralytics/yolov5', 'custom', '/content/1500img.pt')

# Load class names
names = model.names

# Define the folder containing images
image_folder = "/content/datasets/helmet-2/test/images"  # Replace with your folder path

# Iterate through images in the folder
for filename in os.listdir(image_folder):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        image_path = os.path.join(image_folder, filename)
        print(image_path)
        # Perform inference
        results = model(image_path)
        results.show()

        # Check for nested bounding boxes and missing helmets
        for i, result1 in enumerate(results.xyxy[0]):
            x1_1, y1_1, x2_1, y2_1, conf1, cls1 = result1
            class_name1 = names[int(cls1)]

            if class_name1 == 'rider':
                helmet_found = False
                for j, result2 in enumerate(results.xyxy[0]):
                    if i != j:
                        x1_2, y1_2, x2_2, y2_2, conf2, cls2 = result2
                        class_name2 = names[int(cls2)]
                        if class_name2 == 'helmet' and x1_2 > x1_1 and y1_2 > y1_1 and x2_2 < x2_1 and y2_2 < y2_1:
                            helmet_found = True
                            break

                if not helmet_found:
                    print(f"Image: {filename} - No helmet is detected inside the rider class at bounding box: {x1_1}, {y1_1}, {x2_1}, {y2_1}")
                    !python detect.py --weights /content/1500img.pt --source /content/datasets/helmet-2/{filename} --img 416 --classes 1 --save-crop
            else:
                for j, result2 in enumerate(results.xyxy[0]):
                    if i != j:
                        x1_2, y1_2, x2_2, y2_2, conf2, cls2 = result2
                        class_name2 = names[int(cls2)]
                        if class_name2 == 'plate' and x1_2 > x1_1 and y1_2 > y1_1 and x2_2 < x2_1 and y2_2 < y2_1:
                            print(f"Image: {filename} - Class {class_name2} is inside class {class_name1}")


In [None]:
# Inference for Multiple Riders in a Single Frame


import torch
from PIL import Image
import os

def is_inside(box1, box2):
  """
  Checks if box1 is completely inside box2.

  Args:
      box1: A dictionary containing 'xmin', 'ymin', 'xmax', and 'ymax' keys.
      box2: A dictionary containing 'xmin', 'ymin', 'xmax', and 'ymax' keys.

  Returns:
      True if box1 is completely inside box2, False otherwise.
  """
  return (box1['xmin'] >= box2['xmin'] and
          box1['ymin'] >= box2['ymin'] and
          box1['xmax'] <= box2['xmax'] and
          box1['ymax'] <= box2['ymax'])

# Model loading (assuming you have the model loaded)
model = torch.hub.load('ultralytics/yolov5', 'custom', path='/content/1500img.pt')

# Define image folder path
image_folder = "/content/testset"

# Iterate through images in the folder
for filename in os.listdir(image_folder):
  if filename.endswith(".jpg") or filename.endswith(".png"):
    # Create full image path
    image_path = os.path.join(image_folder, filename)

    # Inference
    results = model(image_path)
    results.show()

    # Get bounding boxes and class names
    boxes = results.pandas().xyxy[0].to_dict('records')
    class_names = results.pandas().names[0] if results.pandas().names else [f"box_{i+1}" for i in range(len(boxes))]

    # Find rider and helmet bounding boxes
    rider_boxes = [box for box in boxes if box['name'] == "rider"]
    helmet_boxes = [box for box in boxes if box['name'] == "helmet"]
    license_plate_boxes = [box for box in boxes if box['name'] == "plate"]

    # Check for helmet inside each rider box
    rider_count = 1
    for rider_box in rider_boxes:
      helmet_found = False
      for helmet_box in helmet_boxes:
        if is_inside(helmet_box, rider_box):
          helmet_found = True
          break

      if not helmet_found:
        print(f"No helmet found for rider {rider_count} in image: {filename}")

        # If no helmet found, find license plate inside the rider's box and crop it
        for plate_box in license_plate_boxes:
          if is_inside(plate_box, rider_box):
            # Crop the license plate from the image
            img = Image.open(image_path)
            plate_img = img.crop((plate_box['xmin'], plate_box['ymin'], plate_box['xmax'], plate_box['ymax']))
            # Save cropped image with filename
            plate_img.save(f"/content/LP_CROPS/rider_{rider_count}_plate_{filename}_.jpg")
            print(f"License plate cropped for rider {rider_count} in image: {filename}")

      rider_count += 1



In [None]:
!pip install easyocr
import cv2
from matplotlib import pyplot as plt
import numpy as np
import imutils
import easyocr


In [None]:
import os
import cv2
import easyocr

# Path to the directory containing images
image_directory = "/content/LP_CROPS/"

# Initialize the EasyOCR reader
reader = easyocr.Reader(['en'])  # You can specify languages here

# Path to the output text file
output_file = "/content/extracted_text.txt"

# Open the output text file in write mode
with open(output_file, "w") as f:
    # Loop through each file in the directory
    for filename in os.listdir(image_directory):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            # Construct the full path to the image file
            image_path = os.path.join(image_directory, filename)

            # Load the image
            image = cv2.imread(image_path)

            # Define the new size
            new_width = 5 * image.shape[1]  # Double the width
            new_height = 5 * image.shape[0]  # Double the height

            # Upscale the image using bilinear interpolation
            upscaled_img = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

            # Perform OCR on the upscaled image
            result = reader.readtext(upscaled_img)

            # Extract and print the text
            extracted = ' '.join([entry[1] for entry in result])
            extracted_text = extracted.upper()
            print("Text from", filename, ":", extracted_text)

            # Write the extracted text to the output text file
            f.write(f"Text from {filename}: {extracted_text}\n")


Text from rider_1_plate_0077_jpg.rf.78fe62e83395d931f12a920d0ac16fcb.jpg_.jpg : MAZZB 4ZS31
Text from rider_1_plate_ADB03TE193676294_jpg.rf.26a1de88139e056d686058dfe8150d3e.jpg_.jpg : 7L30O
Text from rider_1_plate_ADB03TE193684491_jpg.rf.270cedd023118d817d90f97e393bcfd5.jpg_.jpg : ISO1ELL4AI
Text from rider_1_plate_KBA01EC191105189_jpg.rf.4a635226e33e51eac11d822423ba9423.jpg_.jpg : TS 20 8565
Text from rider_1_plate_ADB03TE193710917_jpg.rf.224f2d2c94455baeb6f33699577ed0ea.jpg_.jpg : LS O EJ?4
Text from rider_2_plate_ADB05EC198629625_jpg.rf.eb970c0cbf801ca03dc2425327fc9332.jpg_.jpg : EIS OHE98
Text from rider_2_plate_0872_jpg.rf.d3f5056ba923ff3fcba136354ebfefc8.jpg_.jpg : TS01 EA5415
