In [4]:
# Cell 1: Install Required Libraries
!pip install ultralytics easyocr opencv-python-headless matplotlib pandas numpy

Collecting ultralytics
  Downloading ultralytics-8.3.168-py3-none-any.whl.metadata (37 kB)
Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nv

In [5]:
# Cell 2: Mount Google Drive and Copy/Unzip Data (UPDATED for Test Zip Extraction)
from google.colab import drive
import os
import shutil
import zipfile

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to your main project folder on Google Drive
drive_project_path = '/content/drive/MyDrive/LicensePlateProject'

# Define a local path in Colab where we'll copy and unzip the data for faster access
local_data_path = '/content/LicensePlateDetection'

# Create the local directory if it doesn't exist
os.makedirs(local_data_path, exist_ok=True)

print(f"Working directory created at: {local_data_path}")

# --- Copy CSV files ---
print("\nCopying CSV annotation files...")
csv_files_to_copy = [
    'Licplatesdetection_train.csv',
    'Licplatesrecognition_train.csv',
    'Predicted_License_Plates.csv',
    'SampleSubmission.csv'
]

for csv_file in csv_files_to_copy:
    src_path = os.path.join(drive_project_path, csv_file)
    dest_path = os.path.join(local_data_path, csv_file)
    if os.path.exists(src_path):
        shutil.copy2(src_path, dest_path)
        print(f"Copied: {csv_file}")
    else:
        print(f"Warning: {csv_file} not found at {src_path}")

# --- Unzip image datasets ---
print("\nUnzipping image datasets...")

detection_train_zip = os.path.join(drive_project_path, 'Licplatesdetection_train.zip')
recognition_train_zip = os.path.join(drive_project_path, 'Licplatesrecognition_train.zip')
test_zip = os.path.join(drive_project_path, 'test.zip')

# Unzip detection training data (Training Set 1)
if os.path.exists(detection_train_zip):
    print(f"Unzipping {detection_train_zip}...")
    with zipfile.ZipFile(detection_train_zip, 'r') as zip_ref:
        zip_ref.extractall(local_data_path) # Extracts to /content/LicensePlateDetection/license_plates_detection_train/
    print("Licplatesdetection_train.zip unzipped.")
else:
    print(f"Warning: {detection_train_zip} not found.")

# Unzip recognition training data (Training Set 2)
if os.path.exists(recognition_train_zip):
    print(f"Unzipping {recognition_train_zip}...")
    with zipfile.ZipFile(recognition_train_zip, 'r') as zip_ref:
        zip_ref.extractall(local_data_path) # Extracts to /content/LicensePlateDetection/license_plates_recognition_train/
    print("Licplatesrecognition_train.zip unzipped.")
else:
    print(f"Warning: {recognition_train_zip} not found.")

# Unzip test data to a *temporary, dedicated location* for Cell 2.5 to process
temp_test_extract_path = os.path.join(local_data_path, 'temp_test_extraction_area')
os.makedirs(temp_test_extract_path, exist_ok=True) # Ensure temp directory exists

if os.path.exists(test_zip):
    print(f"Unzipping {test_zip} to temporary location {temp_test_extract_path}...")
    with zipfile.ZipFile(test_zip, 'r') as zip_ref:
        zip_ref.extractall(temp_test_extract_path)
    print("test.zip unzipped to temporary location.")
else:
    print(f"Warning: {test_zip} not found.")

print("\nVerifying contents of the local data directory:")
!ls -R {local_data_path}

Mounted at /content/drive
Working directory created at: /content/LicensePlateDetection

Copying CSV annotation files...
Copied: Licplatesdetection_train.csv
Copied: Licplatesrecognition_train.csv
Copied: Predicted_License_Plates.csv
Copied: SampleSubmission.csv

Unzipping image datasets...
Unzipping /content/drive/MyDrive/LicensePlateProject/Licplatesdetection_train.zip...
Licplatesdetection_train.zip unzipped.
Unzipping /content/drive/MyDrive/LicensePlateProject/Licplatesrecognition_train.zip...
Licplatesrecognition_train.zip unzipped.
Unzipping /content/drive/MyDrive/LicensePlateProject/test.zip to temporary location /content/LicensePlateDetection/temp_test_extraction_area...
test.zip unzipped to temporary location.

Verifying contents of the local data directory:
/content/LicensePlateDetection:
license_plates_detection_train	  Predicted_License_Plates.csv
license_plates_recognition_train  SampleSubmission.csv
Licplatesdetection_train.csv	  temp_test_extraction_area
Licplatesrecognit

In [6]:
# Cell 2.5: Reorganize Image Folders for YOLOv8 Compatibility (ABSOLUTELY ROBUST REWRITE)
import os
import shutil

# Ensure local_data_path is defined from previous cells
# local_data_path = '/content/LicensePlateDetection' # Uncomment if running standalone

print(f"Reorganizing image folders within: {local_data_path}")

src_train_images = os.path.join(local_data_path, 'license_plates_detection_train')
dest_train_images_base = os.path.join(local_data_path, 'images', 'train')

# Define the target flat test image directory
dest_test_images_flat = os.path.join(local_data_path, 'images', 'test')

# Create parent 'images' directory and the flat 'images/test' directory
os.makedirs(os.path.join(local_data_path, 'images'), exist_ok=True)
os.makedirs(dest_test_images_flat, exist_ok=True)


# --- Handle Training Images (Copy contents) ---
if os.path.exists(src_train_images):
    if os.path.exists(dest_train_images_base) and os.path.isdir(dest_train_images_base):
        print(f"Clearing contents of {dest_train_images_base} for fresh copy...")
        for item in os.listdir(dest_train_images_base):
            item_path = os.path.join(dest_train_images_base, item)
            if os.path.isfile(item_path) or os.path.islink(item_path):
                os.unlink(item_path)
            elif os.path.isdir(item_path):
                shutil.rmtree(item_path)
    else:
        os.makedirs(dest_train_images_base, exist_ok=True)

    print(f"Copying contents from {src_train_images} to {dest_train_images_base}...")
    for item in os.listdir(src_train_images):
        s = os.path.join(src_train_images, item)
        d = os.path.join(dest_train_images_base, item)
        if os.path.isdir(s):
            shutil.copytree(s, d, dirs_exist_ok=True)
        else:
            shutil.copy2(s, d)
    print("Training images copied.")
    shutil.rmtree(src_train_images)
    print(f"Removed original source folder: {src_train_images}")
else:
    print(f"Warning: Source training image folder not found at {src_train_images}. Skipping copy.")


# --- Handle Test Images (Robustly find and copy contents from anywhere in local_data_path) ---
print(f"\nSearching for test images to copy to {dest_test_images_flat}...")
test_images_found = 0
# Clear the flat destination before copying
if os.path.exists(dest_test_images_flat) and os.path.isdir(dest_test_images_flat):
    print(f"Clearing contents of {dest_test_images_flat} for fresh copy...")
    for item in os.listdir(dest_test_images_flat):
        item_path = os.path.join(dest_test_images_flat, item)
        if os.path.isfile(item_path) or os.path.islink(item_path):
            os.unlink(item_path)
        elif os.path.isdir(item_path):
            shutil.rmtree(item_path)
else:
    os.makedirs(dest_test_images_flat, exist_ok=True) # Ensure it exists

# Walk through the entire temporary extraction area to find all images
temp_test_extract_path = os.path.join(local_data_path, 'temp_test_extraction_area')
if os.path.exists(temp_test_extract_path):
    for root, dirs, files in os.walk(temp_test_extract_path):
        for file in files:
            if file.lower().endswith(('.jpg', '.png', '.jpeg')): # Use .lower() for case-insensitivity
                src_file_path = os.path.join(root, file)
                dest_file_path = os.path.join(dest_test_images_flat, file)

                # Copy the file
                try:
                    shutil.copy2(src_file_path, dest_file_path)
                    test_images_found += 1
                except shutil.SameFileError:
                    pass # Should not happen with this logic, but good to catch
                except Exception as e:
                    print(f"Error copying {src_file_path} to {dest_file_path}: {e}")

    print(f"Copied {test_images_found} test images to flat structure: {dest_test_images_flat}")

    # Clean up the temporary extraction directory
    print(f"Cleaning up temporary extraction directory: {temp_test_extract_path}")
    shutil.rmtree(temp_test_extract_path)
else:
    print(f"Warning: Temporary test extraction path not found at {temp_test_extract_path}. Skipping test image processing.")


print("\nImage folder reorganization complete. Verifying new structure:")
!ls -R {local_data_path}/images/

Reorganizing image folders within: /content/LicensePlateDetection
Copying contents from /content/LicensePlateDetection/license_plates_detection_train to /content/LicensePlateDetection/images/train...
Training images copied.
Removed original source folder: /content/LicensePlateDetection/license_plates_detection_train

Searching for test images to copy to /content/LicensePlateDetection/images/test...
Clearing contents of /content/LicensePlateDetection/images/test for fresh copy...
Copied 210 test images to flat structure: /content/LicensePlateDetection/images/test
Cleaning up temporary extraction directory: /content/LicensePlateDetection/temp_test_extraction_area

Image folder reorganization complete. Verifying new structure:
/content/LicensePlateDetection/images/:
test  train

/content/LicensePlateDetection/images/test:
1000.jpg  1027.jpg  1054.jpg  1081.jpg	1109.jpg  924.jpg  951.jpg  978.jpg
1001.jpg  1028.jpg  1055.jpg  1082.jpg	1110.jpg  925.jpg  952.jpg  979.jpg
1002.jpg  1029.jpg 

In [7]:
# Cell 3: Load Annotations and Convert to YOLO Format
import pandas as pd
import cv2
import os

# Ensure local_data_path is defined from previous cells
# local_data_path = '/content/LicensePlateDetection' # Uncomment if running standalone

images_dir = os.path.join(local_data_path, 'images', 'train')
labels_dir = os.path.join(local_data_path, 'labels', 'train')
csv_path = os.path.join(local_data_path, 'Licplatesdetection_train.csv')

os.makedirs(labels_dir, exist_ok=True)

print(f"Loading annotations from: {csv_path}")
print(f"Saving YOLO labels to: {labels_dir}")
print(f"Reading images from: {images_dir}")

try:
    df_annotations = pd.read_csv(csv_path)
    print("Annotations CSV loaded successfully.")
except FileNotFoundError:
    print(f"Error: Annotation CSV not found at {csv_path}. Please check the path and ensure it was copied correctly.")
    exit()

class_id = 0

for index, row in df_annotations.iterrows():
    image_name = row['img_id']
    xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']

    image_path = os.path.join(images_dir, image_name)

    img = cv2.imread(image_path)
    if img is None:
        print(f"Warning: Could not read image {image_path}. Skipping annotation. Check if image exists and path is correct.")
        continue

    img_height, img_width, _ = img.shape

    center_x = (xmin + xmax) / 2 / img_width
    center_y = (ymin + ymax) / 2 / img_height
    width = (xmax - xmin) / img_width
    height = (ymax - ymin) / img_height

    yolo_annotation = f"{class_id} {center_x:.6f} {center_y:.6f} {width:.6f} {height:.6f}"

    label_filename = os.path.splitext(image_name)[0] + '.txt'
    label_filepath = os.path.join(labels_dir, label_filename)

    with open(label_filepath, 'w') as f:
        f.write(yolo_annotation)

print(f"Converted {len(df_annotations)} annotations to YOLO format and saved to {labels_dir}")

sample_labels = os.listdir(labels_dir)[:5]
print("\nSample generated label files:")
for label_file in sample_labels:
    with open(os.path.join(labels_dir, label_file), 'r') as f:
        print(f"{label_file}: {f.read().strip()}")

Loading annotations from: /content/LicensePlateDetection/Licplatesdetection_train.csv
Saving YOLO labels to: /content/LicensePlateDetection/labels/train
Reading images from: /content/LicensePlateDetection/images/train
Annotations CSV loaded successfully.
Converted 900 annotations to YOLO format and saved to /content/LicensePlateDetection/labels/train

Sample generated label files:
170.txt: 0 0.552506 0.542240 0.159905 0.055010
413.txt: 0 0.440588 0.720039 0.172941 0.080550
554.txt: 0 0.423016 0.787535 0.173016 0.090652
873.txt: 0 0.579268 0.743144 0.334146 0.071298
697.txt: 0 0.555375 0.669104 0.335505 0.040219


In [8]:
# Cell 4: Create data.yaml for YOLOv8 (SIMPLIFIED PATH)
import os

# Ensure local_data_path is defined from previous cells
# local_data_path = '/content/LicensePlateDetection' # Uncomment if running standalone

data_yaml_content = f"""
path: {local_data_path} # Root path to your dataset

train:
  images: images/train
  labels: labels/train

val:
  images: images/train
  labels: labels/train

# CORRECTED: Path for test images is now flat
test: images/test

nc: 1
names: ['license_plate']
"""

data_yaml_path = os.path.join(local_data_path, 'data.yaml')
with open(data_yaml_path, 'w') as f:
    f.write(data_yaml_content)

print(f"data.yaml created at: {data_yaml_path}")
print("\nContent of data.yaml:")
!cat {data_yaml_path}

data.yaml created at: /content/LicensePlateDetection/data.yaml

Content of data.yaml:

path: /content/LicensePlateDetection # Root path to your dataset

train:
  images: images/train
  labels: labels/train

val:
  images: images/train
  labels: labels/train

# CORRECTED: Path for test images is now flat
test: images/test

nc: 1
names: ['license_plate']


In [None]:
# Cell 5: Train the YOLOv8 Model (UPDATED: Make `model` global)
from ultralytics import YOLO
import os

# Ensure data_yaml_path is defined from previous cell
# local_data_path = '/content/LicensePlateDetection' # Uncomment if running standalone
# data_yaml_path = os.path.join(local_data_path, 'data.yaml')

# Declare model as global here
global model
model = YOLO('yolov8s.pt')

print("Starting YOLOv8 training...")
results = model.train(data=data_yaml_path,
                      epochs=10, # <--- REDUCED EPOCHS FOR FASTER TESTING
                      imgsz=640,
                      batch=16,
                      project=os.path.join(local_data_path, 'runs'), # Force results into your local_data_path
                      name='license_plate_detector', # This will create 'license_plate_detector' inside 'project'
                      patience=20)

print("\nYOLOv8 training completed.")
print(f"Training results saved to: {model.trainer.save_dir}")

# The trained model object 'model' is now global and directly accessible.
# No need for global_trained_model_path string anymore.

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 96.5MB/s]


Starting YOLOv8 training...
Ultralytics 8.3.168 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/LicensePlateDetection/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=license_plate_detector, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True

100%|██████████| 755k/755k [00:00<00:00, 19.6MB/s]

Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           





  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256, 512, 3, 2]              
  8                  -1  1   1838080  ultralytics.nn.modules.block.C2f             [512, 512, 1, True]           
  9                  -1  1    656896  ultralytics.nn.modules.block.SPPF            [512, 512, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  1    591360  ultralytics.nn.modules.block.C2f             [768, 256, 1]                 
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 15                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384,

[34m[1mtrain: [0mScanning /content/LicensePlateDetection/labels/test... 900 images, 210 backgrounds, 0 corrupt: 100%|██████████| 1110/1110 [00:00<00:00, 2041.13it/s]

[34m[1mtrain: [0mNew cache created: /content/LicensePlateDetection/labels/test.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1190.2±520.8 MB/s, size: 79.4 KB)


[34m[1mval: [0mScanning /content/LicensePlateDetection/labels/test.cache... 900 images, 210 backgrounds, 0 corrupt: 100%|██████████| 1110/1110 [00:00<?, ?it/s]

Plotting labels to /content/LicensePlateDetection/runs/license_plate_detector/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/content/LicensePlateDetection/runs/license_plate_detector[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G       1.22      3.101      1.062          6        640: 100%|██████████| 70/70 [48:06<00:00, 41.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 35/35 [10:38<00:00, 18.23s/it]


                   all       1110        900      0.215      0.109     0.0852      0.044

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/70 [00:00<?, ?it/s]

In [None]:
# Cell 6: Define the License Plate Recognition Function (UPDATED: Use global `model`)
import cv2
import easyocr
import matplotlib.pyplot as plt
import numpy as np
import os
from ultralytics import YOLO # Ensure YOLO is imported

# Ensure local_data_path is defined from previous cells (Cell 2)
local_data_path = '/content/LicensePlateDetection'

# Initialize EasyOCR reader once
reader = easyocr.Reader(['en', 'ar'])

# The 'model' object is now expected to be available globally from Cell 5
# No need to reload it or use global_trained_model_path here.
# Add a check to ensure 'model' is defined, just in case Cell 5 wasn't run.
if 'model' not in globals() or not isinstance(model, YOLO):
    print("Warning: YOLO model not found globally. Attempting to load default yolov8s.pt.")
    model = YOLO("yolov8s.pt") # Fallback if global model not set
    print("Loaded default YOLOv8s model for demonstration. Please run Cell 5 first for actual results.")
else:
    print("✅ YOLO model found globally (from Cell 5).")


def process_license_plate_image(image_path, yolo_model, border_crop_percentage=0.04):
    img = cv2.imread(image_path)
    if img is None:
        return None, None, "Image Load Error", None, None

    results = yolo_model(img, imgsz=640, verbose=False)
    detected_plates = []
    for r in results:
        for box in r.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            detected_plates.append({'xmin': x1, 'ymin': y1, 'xmax': x2, 'ymax': y2})

    if not detected_plates:
        return img, None, "No plate detected", None, None

    plate_bbox = detected_plates[0]
    xmin, ymin, xmax, ymax = plate_bbox['xmin'], plate_bbox['ymin'], plate_bbox['xmax'], plate_bbox['ymax']

    cropped_plate = img[max(0, ymin):min(ymax, img.shape[0]), max(0, xmin):min(xmax, img.shape[1])]

    if cropped_plate.size == 0:
        return img, cropped_plate, "Cropping Error (initial)", None, None

    h_plate, w_plate = cropped_plate.shape[:2]
    crop_x = int(w_plate * border_crop_percentage)
    crop_y = int(h_plate * border_crop_percentage)

    cropped_plate_no_border = cropped_plate[
        min(crop_y, h_plate):max(h_plate - crop_y, 0),
        min(crop_x, w_plate):max(w_plate - crop_x, 0)
    ]

    if cropped_plate_no_border.size == 0:
        return img, cropped_plate_no_border, "Cropping Error (border removal)", None, None

    # --- Image Enhancements for OCR (Reverted to simpler, more robust) ---
    gray_plate = cv2.cvtColor(cropped_plate_no_border, cv2.COLOR_BGR2GRAY)
    blurred_plate = cv2.GaussianBlur(gray_plate, (5, 5), 0)
    _, image_for_ocr = cv2.threshold(blurred_plate, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # --- End Image Enhancements ---

    result_ocr_detailed = reader.readtext(image_for_ocr,
                                          allowlist='0123456789تونس', # Keep allowlist for better focus
                                          detail=1)

    result_ocr_detailed.sort(key=lambda x: x[0][0][0]) # Sort by x-coordinate for reading order

    # --- REFINED Text Reconstruction Logic based on [2/3 digits] + "تونس" + [4 digits] ---
    final_recognized_text = ""

    # Store potential number and 'تونس' segments
    num_parts = []
    touns_part = ""

    for (bbox, text, prob) in result_ocr_detailed:
        clean_text = text.strip()

        # Check for 'تونس' and its common misrecognitions
        if "تونس" in clean_text or "توس" in clean_text or "تونسس" in clean_text:
            touns_part = "تونس"
        elif clean_text.isdigit():
            num_parts.append(clean_text)
        # Optional: You can add logic here to filter out very low confidence detections
        # if prob < 0.5: continue # Skip very low confidence detections

    # Now, reconstruct based on the expected pattern
    if touns_part:
        # Try to find the split point for numbers
        combined_numbers = "".join(num_parts)

        # Heuristic for splitting numbers:
        # If combined_numbers is long enough for both parts (e.g., 6 digits for 2+4 or 3+3)
        if len(combined_numbers) >= 6:
            # Try 2 digits + 4 digits
            part1 = combined_numbers[:-4]
            part2 = combined_numbers[-4:]

            # If the first part is too long (e.g., 4 digits when it should be 2/3)
            if len(part1) > 3:
                part1 = part1[-3:] # Take last 3 digits
            elif len(part1) == 0: # If no first part was detected, but we expect it
                part1 = combined_numbers[:2] # Take first 2 digits as a guess

            final_recognized_text = f"{part1} {touns_part} {part2}"

        elif len(combined_numbers) >= 4: # If only 4 digits are found (likely the second part)
            final_recognized_text = f"{touns_part} {combined_numbers}"
        elif len(combined_numbers) > 0: # If some numbers, but not 4
            final_recognized_text = f"{touns_part} {combined_numbers}"
        else: # Only 'تونس' detected
            final_recognized_text = f"{touns_part}"
    else:
        # Fallback if 'تونس' was not detected at all
        all_numbers = "".join(num_parts)
        if len(all_numbers) >= 6:
            # Try to force the structure with a placeholder 'تونس'
            part1 = all_numbers[:-4]
            part2 = all_numbers[-4:]
            if len(part1) > 3:
                part1 = part1[-3:]
            elif len(part1) == 0:
                part1 = all_numbers[:2]
            final_recognized_text = f"{part1} تونس {part2}"
        elif len(all_numbers) > 0:
            final_recognized_text = f"تونس {all_numbers}"
        else:
            final_recognized_text = "No recognizable text"

    final_recognized_text = " ".join(final_recognized_text.split()).strip() # Clean up extra spaces

    return img, cropped_plate_no_border, final_recognized_text, image_for_ocr, result_ocr_detailed

In [None]:
# Cell 7: Run on Specific Test Images and Visualize Results (UPDATED)
# --- Main execution for the test set ---
if __name__ == '__main__':
    print("\n--- Starting License Plate Recognition on Specific Test Images ---")

    # Ensure local_data_path and model are defined from previous cells
    # local_data_path = '/content/LicensePlateDetection' # Uncomment if running standalone
    # model = YOLO(...) # Load your trained model here if running standalone

    # Path to your test images (now flat, thanks to Cell 2.5)
    test_image_folder_full_path = os.path.join(local_data_path, 'images', 'test')

    # UPDATED: Only include the specific image IDs requested by the user
    specific_image_ids = [
        '1022.jpg', '1008.jpg', '1000.jpg', '1034.jpg', '1036.jpg', '1037.jpg',
        '1039.jpg', '1045.jpg', '1048.jpg', '1053.jpg', '1054.jpg', '1099.jpg',
        '1100.jpg', '1103.jpg', '912.jpg', '913.jpg', '921.jpg', '938.jpg',
        '945.jpg', '950.jpg', '954.jpg'
    ]

    # Filter available files to only include the specific ones requested
    all_available_files = os.listdir(test_image_folder_full_path)
    test_image_files = [f for f in specific_image_ids if f in all_available_files]
    test_image_files.sort() # Keep them sorted for consistent output

    if not test_image_files:
        print("Warning: None of the specified images were found in the test directory.")
    else:
        print(f"Processing {len(test_image_files)} specific images.")

    results_list = []

    for i, img_file in enumerate(test_image_files):
        full_image_path = os.path.join(test_image_folder_full_path, img_file)
        print(f"\nProcessing image {i+1}/{len(test_image_files)}: {img_file}")

        if os.path.exists(full_image_path):
            original_img, processed_plate_img, recognized_text, _, _ = process_license_plate_image(
                full_image_path, model, border_crop_percentage=0.04
            )

            results_list.append({
                'image_id': img_file,
                'recognized_plate': recognized_text
            })

            # Display results for visual inspection
            if original_img is not None:
                plt.figure(figsize=(12, 6))

                plt.subplot(1, 2, 1)
                plt.imshow(cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB))
                plt.title(f"Original Image: {img_file}")
                plt.axis('off')

                if processed_plate_img is not None:
                    plt.subplot(1, 2, 2)
                    plt.imshow(cv2.cvtColor(processed_plate_img, cv2.COLOR_BGR2RGB))
                    plt.title(f"Cropped Plate (No Border)\nRecognized: {recognized_text}")
                    plt.axis('off')
                else:
                    plt.subplot(1, 2, 2)
                    plt.text(0.5, 0.5, recognized_text,
                             horizontalalignment='center', verticalalignment='center',
                             transform=plt.gca().transAxes, fontsize=12, color='red')
                    plt.axis('off')

                plt.tight_layout()
                plt.show()
            else:
                print(f"Failed to load or process image: {full_image_path}")
        else:
            print(f"Error: Specified image not found at {full_image_path}. Skipping.")

    print("\n--- Summary of Test Set Recognition Results ---")
    if results_list:
        for res in results_list:
            print(f"Image: {res['image_id']}, Recognized Plate: {res['recognized_plate']}")

        output_df = pd.DataFrame(results_list)
        output_csv_path = os.path.join(local_data_path, 'license_plate_recognition_results.csv')
        output_df.to_csv(output_csv_path, index=False)
        print(f"\nAll recognition results saved to: {output_csv_path}")

        # Optional: Copy the results CSV back to your Google Drive for easy access
        # shutil.copy2(output_csv_path, os.path.join(drive_project_path, 'license_plate_recognition_results.csv'))
        # print(f"Results copied to Google Drive: {os.path.join(drive_project_path, 'license_plate_recognition_results.csv')}")
    else:
        print("No images were processed.")

In [None]:
# Cell 9: Test Specific Images for OCR Debugging (No Code Change, Just a Reminder)
import os
import cv2
import matplotlib.pyplot as plt
# Ensure 'model' and 'process_license_plate_image' are defined from Cell 6
# Ensure 'local_data_path' is defined from Cell 2

print("--- Testing Specific Images for OCR Debugging ---")

# --- Configuration for images to test ---
# This list is already set to the specific images you requested
images_to_test_filenames = [
    '1022.jpg', '1008.jpg', '1000.jpg', '1034.jpg', '1036.jpg', '1037.jpg',
    '1039.jpg', '1045.jpg', '1048.jpg', '1053.jpg', '1054.jpg', '1099.jpg',
    '1100.jpg', '1103.jpg', '912.jpg', '913.jpg', '921.jpg', '938.jpg',
    '945.jpg', '950.jpg', '954.jpg'
]

# Base path to your test images (now flat, thanks to Cell 2.5)
base_test_image_path = os.path.join(local_data_path, 'images', 'test')


# --- Process each image in the list ---
for i, image_filename in enumerate(images_to_test_filenames):
    image_path_to_test = os.path.join(base_test_image_path, image_filename)

    print(f"\n--- Processing image {i+1}/{len(images_to_test_filenames)}: {image_filename} ---")

    if os.path.exists(image_path_to_test):
        # Call the updated process_license_plate_image function from Cell 6
        original_img, cropped_plate_no_border, recognized_text, image_for_ocr, raw_ocr_output = \
            process_license_plate_image(image_path_to_test, model, border_crop_percentage=0.04)

        # For debugging, print the raw OCR result for each image
        print("\nRaw EasyOCR Output (bbox, text, confidence):")
        if raw_ocr_output:
            for item in raw_ocr_output:
                print(item)
        else:
            print("No OCR output found for this image (or plate not detected).")

        # --- Display Results ---
        if original_img is not None:
            plt.figure(figsize=(20, 8)) # Increased figure size

            plt.subplot(1, 3, 1) # Now 3 subplots
            plt.imshow(cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB))
            plt.title(f"Original Image: {image_filename}")
            plt.axis('off')

            if cropped_plate_no_border is not None:
                plt.subplot(1, 3, 2) # Second subplot
                plt.imshow(cv2.cvtColor(cropped_plate_no_border, cv2.COLOR_BGR2RGB))
                plt.title(f"Cropped Plate (No Border)")
                plt.axis('off')

                if image_for_ocr is not None:
                    plt.subplot(1, 3, 3) # Third subplot
                    plt.imshow(image_for_ocr, cmap='gray') # Show in grayscale for binarized image
                    plt.title(f"Image Fed to OCR\nRecognized: {recognized_text}")
                    plt.axis('off')
            else:
                plt.subplot(1, 3, 2)
                plt.text(0.5, 0.5, f"Could not process plate.\nReason: {recognized_text}",
                         horizontalalignment='center', verticalalignment='center',
                         transform=plt.gca().transAxes, fontsize=12, color='red')
                plt.axis('off')
                plt.subplot(1, 3, 3)
                plt.axis('off') # Hide empty subplot

            plt.tight_layout()
            plt.show()
        else:
            print(f"Failed to load or process image: {image_path_to_test}")
    else:
        print(f"Error: Image not found at {image_path_to_test}. Please check the path and filename.")

print("\n--- Specific Images OCR Debugging Complete ---")