In [1]:
from ultralytics import YOLO
import cv2

from sort.sort import *

In [2]:
yolo_model=YOLO('yolo11n.pt')
plate_model=YOLO('plate.pt') #plate detection model
model_ocr = YOLO("best.pt")  #  OCR model

In [3]:
def detect_and_crop_plate(image_path, yolo_model, plate_model):
    """
    Detect and crop license plate from an image
    
    Args:
        image_path (str): Path to the input image
        yolo_model: YOLO model for object detection
        plate_model: Model for license plate detection
    """
    # Read and convert image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Could not read the image")
        
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Perform detections
    detections = yolo_model(image_rgb)[0]
    plates = plate_model(image_rgb)[0]

    # Plot the original image
    plt.figure(figsize=(10, 6))
    plt.imshow(image_rgb)
    plt.axis("off")

    # Variable to store cropped plate
    plate_cropped = None

    # Loop through detected plates
    for plate in plates.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = plate

        # Crop the detected plate
        plate_cropped = image_rgb[int(y1):int(y2), int(x1):int(x2)]
        
        # Show cropped plate
        plt.figure(figsize=(3, 2))
        plt.imshow(plate_cropped)
        plt.axis("off")

    plt.show()
    
    return plate_cropped

# Usage example:
try:
    cropped_plate = detect_and_crop_plate('download_9.jpg', yolo_model, plate_model)
    if cropped_plate is not None:
        print("Plate cropped successfully.")
    else:
        print("No plate detected or cropped.")
except Exception as e:
    print(f"An error occurred: {str(e)}")


0: 640x640 2 cars, 1 bus, 188.8ms
Speed: 12.3ms preprocess, 188.8ms inference, 13.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 License Plate, 85.7ms
Speed: 5.6ms preprocess, 85.7ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)
Plate cropped successfully.


In [16]:
def process_license_plate(image_path):
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Could not read the image")

    # Convert BGR to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Load model and make predictions
    model_ocr = YOLO('best.pt')
    result = model_ocr.predict(source=image_rgb, conf=0.25)[0]

    detected_numbers = []
    detected_letters = []

    # Sort boxes by x-coordinate (left to right)
    sorted_boxes = sorted(result.boxes, key=lambda b: b.xyxy[0][0].item())

    # Process each detected character
    for box in sorted_boxes:
        class_id = int(box.cls)
        
        if class_id in result.names:
            recognized_text = result.names[class_id]
            
            # Add to appropriate list
            if recognized_text.isdigit():
                detected_numbers.append(recognized_text)
            else:
                detected_letters.append(recognized_text)

    # Print the detected numbers and letters, preserving their original order
    print("Detected Numbers:", detected_numbers)
    print("Detected Letters:", detected_letters)

    # Visualize results
    plt.figure(figsize=(10, 6))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.show()

# Use the function
try:
    process_license_plate('Figure_1.0.png')
except Exception as e:
    print(f"An error occurred: {str(e)}")


0: 448x640 1 1, 1 3, 1 5, 1 6, 1 faa, 1 jeem, 1 waw, 73.6ms
Speed: 2.5ms preprocess, 73.6ms inference, 1.1ms postprocess per image at shape (1, 3, 448, 640)
Detected Numbers: ['1', '3', '6', '5']
Detected Letters: ['faa', 'waw', 'jeem']


In [4]:
# This code is a complete pipeline for license plate detection, cropping, and OCR without arabic maping
def license_plate_detection_pipeline(image_path, yolo_model, plate_model, ocr_model):
    """
    Complete pipeline for license plate detection, cropping, and OCR
    
    Args:
        image_path (str): Path to the input image
        yolo_model: YOLO model for object detection
        plate_model: Model for license plate detection
        ocr_model: Model for OCR (text recognition)
    """
    # Read and convert image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Could not read the image")
        
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Perform detections
    detections = yolo_model(image_rgb)[0]
    plates = plate_model(image_rgb)[0]

    # Plot the original image
    plt.figure(figsize=(10, 6))
    plt.imshow(image_rgb)
    plt.axis("off")

    # Variables to store results
    plate_cropped = None
    detected_numbers = []
    detected_letters = []

    # Loop through detected plates
    for plate in plates.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = plate

        # Crop the detected plate
        plate_cropped = image_rgb[int(y1):int(y2), int(x1):int(x2)]
        
        # Show cropped plate
        plt.figure(figsize=(3, 2))
        plt.imshow(plate_cropped)
        plt.axis("off")

        # Perform OCR on the cropped plate
        result = ocr_model.predict(source=plate_cropped, conf=0.25)[0]
        
        # Sort boxes left to right
        sorted_boxes = sorted(result.boxes, key=lambda b: b.xyxy[0][0].item())

        # Process each detected character
        for box in sorted_boxes:
            class_id = int(box.cls)
            
            if class_id in result.names:
                recognized_text = result.names[class_id]
                
                # Add to appropriate list
                if recognized_text.isdigit():
                    detected_numbers.append(recognized_text)
                else:
                    detected_letters.append(recognized_text)

    # Print the detected numbers and letters
    print("Detected Numbers:", detected_numbers)
    print("Detected Letters:", detected_letters)

    plt.show()
    
    return plate_cropped, detected_numbers, detected_letters

# Usage example:
try:
    cropped_plate, numbers, letters = license_plate_detection_pipeline(
        'download_15.jpg',
        yolo_model,
        plate_model,
        model_ocr  # Your OCR model
    )
except Exception as e:
    print(f"An error occurred: {str(e)}")




0: 640x640 4 cars, 1 truck, 108.6ms
Speed: 6.7ms preprocess, 108.6ms inference, 5.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 License Plate, 94.4ms
Speed: 6.1ms preprocess, 94.4ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 320x640 2 2s, 2 3s, 1 alif, 1 daal, 1 seen, 135.0ms
Speed: 5.4ms preprocess, 135.0ms inference, 1.7ms postprocess per image at shape (1, 3, 320, 640)
Detected Numbers: ['2', '2', '3', '3']
Detected Letters: ['daal', 'seen', 'alif']


In [18]:
#full pipeline with arabic mapping 
def map_to_arabic(numbers, letters):
    # Dictionary for number mapping (English to Arabic)
    number_map = {
        '0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤',
        '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩'
    }
    
    letter_map = {
        'alif': 'ا', 'baa': 'ب', 'taa': 'ت', 'thaa': 'ث',
        'jeem': 'ج', 'haa': 'ح', 'khaa': 'خ', 'daal': 'د',
        'zaal': 'ذ', 'raa': 'ر', 'zay': 'ز', 'seen': 'س',
        'sheen': 'ش', 'saad': 'ص', 'daad': 'ض', 'Taa': 'ط',
        'Thaa': 'ظ', 'ain': 'ع', 'ghayn': 'غ', 'faa': 'ف',
        'qaaf': 'ق', 'kaaf': 'ك', 'laam': 'ل', 'meem': 'م',
        'noon': 'ن', 'haa': 'ه', 'waw': 'و', 'yaa': 'ي',
        '7aa': 'ح'
    }
    
    # Convert numbers
    arabic_numbers = [number_map[num] for num in numbers]
    
    # Convert letters
    arabic_letters = [letter_map[letter] for letter in reversed(letters)]
    
    # Combine numbers and letters with space
    result = ' '.join(arabic_letters + arabic_numbers )
    
    return result

def license_plate_detection_pipeline(image_path, yolo_model, plate_model, ocr_model):
    """
    Complete pipeline for license plate detection, cropping, and OCR
    
    Args:
        image_path (str): Path to the input image
        yolo_model: YOLO model for object detection
        plate_model: Model for license plate detection
        ocr_model: Model for OCR (text recognition)
    """
    # Read and convert image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError("Could not read the image")
        
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Perform detections
    plates = plate_model(image_rgb)[0]

    # Plot the original image
    plt.figure(figsize=(10, 6))
    plt.imshow(image_rgb)
    plt.axis("off")

    # Variables to store results
    plate_cropped = None
    detected_numbers = []
    detected_letters = []

    # Loop through detected plates
    for plate in plates.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = plate

        # Crop the detected plate
        plate_cropped = image_rgb[int(y1):int(y2), int(x1):int(x2)]
        
        # Show cropped plate
        plt.figure(figsize=(3, 2))
        plt.imshow(plate_cropped)
        plt.axis("off")

        # Perform OCR on the cropped plate
        result = ocr_model.predict(source=plate_cropped, conf=0.25)[0]
        
        # Sort boxes left to right
        sorted_boxes = sorted(result.boxes, key=lambda b: b.xyxy[0][0].item())

        # Process each detected character
        for box in sorted_boxes:
            class_id = int(box.cls)
            
            if class_id in result.names:
                recognized_text = result.names[class_id]
                
                # Add to appropriate list
                if recognized_text.isdigit():
                    detected_numbers.append(recognized_text)
                else:
                    detected_letters.append(recognized_text)

    # Print the detected numbers and letters
    arabic_text = map_to_arabic(detected_numbers, detected_letters)
    print(f"License plate in Arabic: {arabic_text}")
    plt.show()
    
    return plate_cropped, arabic_text

# Usage example:
try:
    cropped_plate, arabic_text = license_plate_detection_pipeline(
        'download_7.jpg',
        yolo_model,
        plate_model,
        model_ocr  # Your OCR model
    )
except Exception as e:
    print(f"An error occurred: {str(e)}")




0: 640x512 1 License Plate, 140.9ms
Speed: 3.6ms preprocess, 140.9ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 512)

0: 608x640 4 1s, 2 meems, 1 waw, 90.5ms
Speed: 3.2ms preprocess, 90.5ms inference, 0.9ms postprocess per image at shape (1, 3, 608, 640)
License plate in Arabic: م و م ١ ١ ١ ١


In [5]:
##video pipeline but it is not working perfectly
def map_to_arabic(numbers, letters):
    # Dictionary for number mapping (English to Arabic)
    number_map = {
        '0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤',
        '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩'
    }
    
    letter_map = {
        'alif': 'ا', 'baa': 'ب', 'taa': 'ت', 'thaa': 'ث',
        'jeem': 'ج', 'haa': 'ح', 'khaa': 'خ', 'daal': 'د',
        'zaal': 'ذ', 'raa': 'ر', 'zay': 'ز', 'seen': 'س',
        'sheen': 'ش', 'saad': 'ص', 'daad': 'ض', 'Taa': 'ط',
        'Thaa': 'ظ', 'ain': 'ع', 'ghayn': 'غ', 'faa': 'ف',
        'qaaf': 'ق', 'kaaf': 'ك', 'laam': 'ل', 'meem': 'م',
        'noon': 'ن', 'haa': 'ه', 'waw': 'و', 'yaa': 'ي',
        '7aa': 'ح'
    }
    
    # Convert numbers
    arabic_numbers = [number_map[num] for num in numbers]
    
    # Convert letters
    arabic_letters = [letter_map[letter] for letter in reversed(letters)]
    
    # Combine numbers and letters with space
    result = ' '.join(arabic_letters + arabic_numbers )
    
    return result


def process_uploaded_video(video_path, yolo_model, plate_model, ocr_model, min_conf_threshold=0.25):
    """
    Process uploaded video for license plate detection and OCR,
    storing all detections and selecting the best one based on mean confidence
    """
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        raise ValueError("Could not read the video file")

    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    all_detections = []  # Store all plate detections

    while True:
        ret, frame = video.read()
        if not ret:
            break
            
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        plates = plate_model(frame_rgb)[0]

        for plate in plates.boxes.data.tolist():
            x1, y1, x2, y2, score, class_id = plate
            
            plate_cropped = frame_rgb[int(y1):int(y2), int(x1):int(x2)]
            result = ocr_model.predict(source=plate_cropped, conf=min_conf_threshold)[0]
            
            # Sort boxes left to right
            sorted_boxes = sorted(result.boxes, key=lambda b: b.xyxy[0][0].item())

            # Process detected characters
            detected_chars = []
            
            for box in sorted_boxes:
                class_id = int(box.cls)
                conf_score = float(box.conf)
                
                if class_id in result.names:
                    char = result.names[class_id]
                    detected_chars.append({
                        'char': char,
                        'confidence': conf_score
                    })

            # Store this detection if characters were found
            if detected_chars:
                mean_confidence = sum(d['confidence'] for d in detected_chars) / len(detected_chars)
                all_detections.append({
                    'chars': detected_chars,
                    'plate_crop': plate_cropped,
                    'mean_confidence': mean_confidence
                })

        # Display progress
        current_frame = int(video.get(cv2.CAP_PROP_POS_FRAMES))
        progress = (current_frame / total_frames) * 100
        print(f"\rProcessing: {progress:.1f}%", end="")

    video.release()
    print("\nProcessing completed!")

    # Find the detection with highest mean confidence
    if all_detections:
        best_detection = max(all_detections, key=lambda x: x['mean_confidence'])
        
        # Process the best detection
        letters = []
        numbers = []
        confidence_scores = []
        
        for char_info in best_detection['chars']:
            char = char_info['char']
            if char.isdigit():
                numbers.append(char)
            else:
                letters.append(char)
            confidence_scores.append(char_info['confidence'])

        plate_text = map_to_arabic(numbers, letters)
        best_detection['plate_text'] = plate_text
        
        # Display results
        plt.figure(figsize=(6, 4))
        plt.imshow(best_detection['plate_crop'])
        plt.title(f"Best Detection\nPlate Text: {plate_text}\n"
                f"Mean Confidence: {best_detection['mean_confidence']:.2f}")
        plt.axis('off')
        
        # print(f"\nBest Detection:")
        print(f"Plate Text: {plate_text}")
        print(f"Mean Confidence: {best_detection['mean_confidence']:.2f}")
        
        plt.show()
        
        return {
            'text': plate_text,
            'letters': letters,
            'numbers': numbers,
            'chars': best_detection['chars'],
            'mean_confidence': best_detection['mean_confidence'],
            'plate_crop': best_detection['plate_crop']
        }
    else:
        print("\nNo valid plate detections found in the video.")
        return None

# Usage example:
try:
    best_plate = process_uploaded_video(
        '2.mp4',
        yolo_model,
        plate_model,
        model_ocr,
        min_conf_threshold=0.25
    )
except Exception as e:
    print(f"An error occurred: {str(e)}")


0: 384x640 1 License Plate, 97.0ms
Speed: 3.0ms preprocess, 97.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 320x640 1 4, 1 7, 2 8s, 2 saads, 53.7ms
Speed: 2.8ms preprocess, 53.7ms inference, 1.2ms postprocess per image at shape (1, 3, 320, 640)
Processing: 0.4%
0: 384x640 1 License Plate, 62.9ms
Speed: 2.9ms preprocess, 62.9ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 352x640 1 4, 1 7, 2 8s, 2 saads, 79.2ms
Speed: 2.1ms preprocess, 79.2ms inference, 1.2ms postprocess per image at shape (1, 3, 352, 640)
Processing: 0.8%
0: 384x640 1 License Plate, 64.5ms
Speed: 3.1ms preprocess, 64.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 320x640 1 4, 1 7, 2 8s, 2 saads, 61.1ms
Speed: 2.1ms preprocess, 61.1ms inference, 1.1ms postprocess per image at shape (1, 3, 320, 640)
Processing: 1.2%
0: 384x640 1 License Plate, 65.4ms
Speed: 2.8ms preprocess, 65.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 64