#  Install these libraries

In [None]:
%pip install paddlepaddle  
%pip install paddleocr    
%pip install pillow         
%pip install ultralytics    
%pip install pandas         
%pip install opencv-python






In [1]:
import os
import cv2
import shutil
import random
import pandas as pd
from PIL import Image
from ultralytics import YOLO
from paddleocr import PaddleOCR, draw_ocr





## ZIP to UNZIP comverstion

In [None]:

# Load CSV file
df = pd.read_csv(r'D:\projects\drive-download-20241223T093621Z-001\Licplatesdetection_train.csv')

# Path to the images directory
images_dir = r'D:\projects\drive-download-20241223T093621Z-001\Licplatesdetection_train\license_plates_detection_train'  # Adjust to your image directory path
output_dir = r'D:\projects\drive-download-20241223T093621Z-001\train_label'  # Directory to save YOLO formatted labels

# Function to convert box coordinates to YOLO format
def convert_to_yolo_format(image_name, ymin, xmin, ymax, xmax, image_width, image_height):
    # Calculate normalized coordinates
    x_center = (xmin + xmax) / 2 / image_width
    y_center = (ymin + ymax) / 2 / image_height
    width = (xmax - xmin) / image_width
    height = (ymax - ymin) / image_height
    
    # YOLO format: object-class x_center y_center width height
    label = f"0 {x_center} {y_center} {width} {height}"  # '0' because it's the license plate class
    return label

# Function to process images and create YOLO label files
def process_images(df):
    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)  
    # print(df.head())
    for index, row in df.iterrows():
        # Construct the full image path
        image_path = os.path.join(images_dir, row['img_id'])
        
        # Check if the file exists
        if not os.path.exists(image_path):
            print(f"File not found: {image_path}")
            continue
        
        # Read the image
        image = cv2.imread(image_path)
        
        # Skip if the image couldn't be read
        if image is None:
            print(f"Error reading image: {image_path}")
            continue
        
        # Get image dimensions
        image_height, image_width, _ = image.shape
        
        # Convert bounding box to YOLO format
        label = convert_to_yolo_format(
            row['img_id'], row['ymin'], row['xmin'], row['ymax'], row['xmax'], 
            image_width, image_height
        )
        
        # Write the label to a text file
        # Write the label to a text file
        label_file = os.path.join(output_dir, row['img_id'].replace('.jpg', '.txt'))
        with open(label_file, 'w') as f:
            f.write(label)


# Process the dataset
process_images(df)


# Pre-processing, data analysis, and understanding

In [133]:

""""
To move images and their corresponding label files (with the same name but different extensions) into another folder, you can use the following code:

"""


# Define directories
images_dir = r'D:\projects\drive-download-20241223T093621Z-001\Licplatesdetection_train\license_plates_detection_train'
labels_dir = r'D:\projects\drive-download-20241223T093621Z-001\train_label/'  # Source labels folder
destination_dir = r'D:\projects\drive-download-20241223T093621Z-001\dataset_path'  # Destination folder

# Ensure destination directory exists
if not os.path.exists(destination_dir):
    os.makedirs(destination_dir)

# Move matching files
for label_file in os.listdir(labels_dir):
    # Get the base name (without extension) of the label file
    base_name = os.path.splitext(label_file)[0]
    
    # Construct paths for the label file and corresponding image
    label_path = os.path.join(labels_dir, label_file)
    image_path = os.path.join(images_dir, base_name + '.jpg')  # Assuming images are .jpg
    
    # Check if the corresponding image exists
    if os.path.exists(image_path):
        # Define destination paths
        dest_label = os.path.join(destination_dir, label_file)
        dest_image = os.path.join(destination_dir, base_name + '.jpg')
        
        # Move the files to the destination folder
        shutil.move(label_path, dest_label)
        shutil.move(image_path, dest_image)
        print(f"Moved: {base_name}.jpg and {base_name}.txt")


# Code to Split YOLO Dataset



In [None]:


# Define directories
images_dir = r'D:\projects\drive-download-20241223T093621Z-001\dataset_path'  # Path to images
labels_dir = r'D:\projects\drive-download-20241223T093621Z-001\dataset_path'  # Path to labels
output_dir = r'D:\projects\drive-download-20241223T093621Z-001\dataset_path'  # Path to save split datasets

# Ensure output directories exist
train_images_dir = os.path.join(output_dir, 'train/images')
train_labels_dir = os.path.join(output_dir, 'train/labels')
val_images_dir = os.path.join(output_dir, 'val/images')
val_labels_dir = os.path.join(output_dir, 'val/labels')

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

# Get list of all images and corresponding labels
image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]  # Adjust extension if needed
label_files = [f.replace('.jpg', '.txt') for f in image_files]

# Shuffle the dataset
combined = list(zip(image_files, label_files))
random.shuffle(combined)
image_files, label_files = zip(*combined)

# Split into training and validation sets
split_index = int(0.9 * len(image_files))
train_images = image_files[:split_index]
train_labels = label_files[:split_index]
val_images = image_files[split_index:]
val_labels = label_files[split_index:]

# Function to move files
def move_files(file_list, source_dir, dest_dir):
    for file in file_list:
        src = os.path.join(source_dir, file)
        dest = os.path.join(dest_dir, file)
        shutil.copy(src, dest)

# Move training files
move_files(train_images, images_dir, train_images_dir)
move_files(train_labels, labels_dir, train_labels_dir)

# Move validation files
move_files(val_images, images_dir, val_images_dir)
move_files(val_labels, labels_dir, val_labels_dir)

print(f"Training set: {len(train_images)} images")
print(f"Validation set: {len(val_images)} images")


# Model building 

In [None]:
# Load a pretrained YOLOv8 model
model = YOLO('yolov8s.pt')

# Train the model
model.train(data='dataset.yaml', epochs=50, imgsz=640, batch=16)

# Evaluate the model
model.val()

# Export the model (optional)
model.export(format='onnx')  # Export to ONNX for deployment


In [8]:
# YOLOv8 Model evaluation  
# Load the model
model = YOLO(r"D:\projects\drive-download-20241223T093621Z-001\model\weights\best.pt")

# Run the evaluation
results = model.val(data=r"D:\projects\drive-download-20241223T093621Z-001\dataset.yaml")

# Print specific metrics
print("Class indices with average precision:", results.ap_class_index)
print("Average precision for all classes:", results.box.all_ap)
print("Average precision:", results.box.ap)
print("Average precision at IoU=0.50:", results.box.ap50)
print("Class indices for average precision:", results.box.ap_class_index)
print("Class-specific results:", results.box.class_result)
print("F1 score:", results.box.f1)
print("F1 score curve:", results.box.f1_curve)
print("Overall fitness score:", results.box.fitness)
print("Mean average precision:", results.box.map)
print("Mean average precision at IoU=0.50:", results.box.map50)
print("Mean average precision at IoU=0.75:", results.box.map75)
print("Mean average precision for different IoU thresholds:", results.box.maps)
print("Mean results for different metrics:", results.box.mean_results)
print("Mean precision:", results.box.mp)
print("Mean recall:", results.box.mr)
print("Precision:", results.box.p)
print("Precision curve:", results.box.p_curve)
print("Precision values:", results.box.prec_values)
print("Specific precision metrics:", results.box.px)
print("Recall:", results.box.r)
print("Recall curve:", results.box.r_curve)

Ultralytics 8.3.53  Python-3.8.10 torch-2.4.1+cpu CPU (11th Gen Intel Core(TM) i7-1165G7 2.80GHz)
Model summary (fused): 168 layers, 11,125,971 parameters, 0 gradients, 28.4 GFLOPs


[34m[1mval: [0mScanning D:\projects\drive-download-20241223T093621Z-001\dataset_path\val\labels... 162 images, 0 backgrounds, 159 corrupt: 100%|██████████| 162/162 [00:00<00:00, 308.67it/s]

[34m[1mval: [0mNew cache created: D:\projects\drive-download-20241223T093621Z-001\dataset_path\val\labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:01<00:00,  1.13s/it]


                   all          3          3      0.904          1      0.995      0.852
Speed: 4.6ms preprocess, 320.8ms inference, 0.0ms loss, 33.8ms postprocess per image
Results saved to [1mruns\detect\val3[0m
Class indices with average precision: [0]
Average precision for all classes: [[      0.995       0.995       0.995       0.995       0.995       0.995       0.995       0.995     0.55537           0]]
Average precision: [    0.85154]
Average precision at IoU=0.50: [      0.995]
Class indices for average precision: [0]
Class-specific results: <bound method Metric.class_result of ultralytics.utils.metrics.Metric object with attributes:

all_ap: array([[      0.995,       0.995,       0.995,       0.995,       0.995,       0.995,       0.995,       0.995,     0.55537,           0]])
ap: array([    0.85154])
ap50: array([      0.995])
ap_class_index: array([0])
curves: []
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0

# License Plate Detection and Recognition on Test Set


In [11]:
# Load the trained YOLO model
model = YOLO(r"D:\projects\drive-download-20241223T093621Z-001\model\weights\best.pt")

# Test set directory
test_images_dir = r"D:\projects\drive-download-20241223T093621Z-001\test\test\test"
cropped_images_dir = r"D:\projects\drive-download-20241223T093621Z-001\test\test"
os.makedirs(cropped_images_dir, exist_ok=True)

# Process test images
for image_file in os.listdir(test_images_dir):
    if image_file.endswith(".jpg"):
        image_path = os.path.join(test_images_dir, image_file)
        
        # Run inference
        results = model(image_path, iou=0.90, conf=0.90)
        
        # Save cropped license plates
        results[0].save_crop(cropped_images_dir)



image 1/1 D:\projects\drive-download-20241223T093621Z-001\test\test\test\1000.jpg: 448x640 1 license_plate, 198.2ms
Speed: 0.0ms preprocess, 198.2ms inference, 0.0ms postprocess per image at shape (1, 3, 448, 640)

image 1/1 D:\projects\drive-download-20241223T093621Z-001\test\test\test\1001.jpg: 640x384 1 license_plate, 166.8ms
Speed: 4.0ms preprocess, 166.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)

image 1/1 D:\projects\drive-download-20241223T093621Z-001\test\test\test\1002.jpg: 640x480 2 license_plates, 249.4ms
Speed: 3.9ms preprocess, 249.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 480)

image 1/1 D:\projects\drive-download-20241223T093621Z-001\test\test\test\1003.jpg: 384x640 1 license_plate, 304.7ms
Speed: 1.9ms preprocess, 304.7ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

image 1/1 D:\projects\drive-download-20241223T093621Z-001\test\test\test\1004.jpg: 640x384 1 license_plate, 206.8ms
Speed: 0.0ms preproces

# Predict 

In [4]:


# Load the YOLOv8 model
model = YOLO(r'D:\projects\drive-download-20241223T093621Z-001\model\weights\best.pt')  # Replace with your model path

# Directory paths
image_dir = r'D:\projects\drive-download-20241223T093621Z-001\test'  # Directory with test images
saved_images_dir = r'D:\projects\drive-download-20241223T093621Z-001\saved_images'  # Directory to save cropped images

os.makedirs(saved_images_dir, exist_ok=True)

save_cropped_images = True  # Set to False to disable cropping and saving

# Process each image in the directory
for image_file in os.listdir(image_dir):
    if image_file.endswith('.jpg'): 
        image_path = os.path.join(image_dir, image_file)
        image_filename = os.path.basename(image_path).replace('.jpg', '')
        
        license_plate_dir = os.path.join(saved_images_dir, f"{image_filename}_license_plate")
        os.makedirs(license_plate_dir, exist_ok=True)  # Ensure the directory exists

        # Cropped image path
        cropped_image_filename = os.path.join(license_plate_dir, f"{image_filename}_license_plate.jpg")

        # Run inference with YOLOv8 on the image
        results = model(image_path, iou=0.90, conf=0.90)
        
        # Check if there are detections (boxes) in the results
        if len(results[0].boxes) > 0:
            if save_cropped_images:
                # Save cropped images (license plates) using the result's crop method
                results[0].save_crop(cropped_image_filename)
                
                print(f"Skipped saving cropped image for {image_filename}.")
        else:
            print(f"No license plates detected in {image_filename}.")



image 1/1 D:\projects\drive-download-20241223T093621Z-001\test\903.jpg: 640x480 1 license_plate, 208.3ms
Speed: 1.0ms preprocess, 208.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 480)
Skipped saving cropped image for 903.


# Image to Test convertion Accuracy of the character recognition from the license plate

In [10]:


# Image to Text conversion function
def imagesToTextConvertion(image_path):
    # Initialize the PaddleOCR object (English language)
    ocr = PaddleOCR(lang='en',use_angle_cls = True)
        
    # Perform OCR on the image
    result = ocr.ocr(image_path,det=True, rec=True, cls=False)
    for i in result:
        return i[0][1][0]  # Return the extracted texts


image_path = r"D:\projects\drive-download-20241223T093621Z-001\saved_images\903_license_plate\903_license_plate.jpg\license_plate\im.jpg"


# Example usage:
extracted_text = imagesToTextConvertion(image_path)
print("license plate Text :-", extracted_text)


[2024/12/24 11:24:36] ppocr DEBUG: Namespace(alpha=1.0, alphacolor=(255, 255, 255), benchmark=False, beta=1.0, binarize=False, cls_batch_num=6, cls_image_shape='3, 48, 192', cls_model_dir='C:\\Users\\DELL/.paddleocr/whl\\cls\\ch_ppocr_mobile_v2.0_cls_infer', cls_thresh=0.9, cpu_threads=10, crop_res_save_dir='./output', det=True, det_algorithm='DB', det_box_type='quad', det_db_box_thresh=0.6, det_db_score_mode='fast', det_db_thresh=0.3, det_db_unclip_ratio=1.5, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_east_score_thresh=0.8, det_limit_side_len=960, det_limit_type='max', det_model_dir='C:\\Users\\DELL/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, det_pse_thresh=0, det_sast_nms_thresh=0.2, det_sast_score_thresh=0.5, draw_img_save_dir='./inference_results', drop_score=0.5, e2e_algorithm='PGNet', e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_limit_side_len=768, e2e_limit_type='max', e2e_model_dir=None, e