In [1]:
!pip install ultralytics opencv-python deep_sort_realtime
!pip install supervision inference
!pip install Pillow
!pip install python-bidi
import tensorflow as tf
import keras
from ultralytics import YOLO
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import supervision as sv
import numpy as np
from google.colab.patches import cv2_imshow
import re
from keras.models import load_model
import csv
import pandas as pd
import ast
from scipy import interpolate
import string
from PIL import Image, ImageDraw, ImageFont  # Add these imports
from bidi.algorithm import get_display  # Add this for proper Persian text direction




In [2]:
!wget https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/new_cap.mp4
!wget https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/OCR_inference.h5
!wget https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/plate_charset%20_OCR.txt
!wget https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/plate.pt
!wget https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/Vazirmatn-Black.ttf
!wget https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/vehicle.pt

--2025-07-18 09:54:54--  https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/new_cap.mp4
Resolving filedn.eu (filedn.eu)... 185.62.236.186
Connecting to filedn.eu (filedn.eu)|185.62.236.186|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10819664 (10M) [video/mp4]
Saving to: ‘new_cap.mp4.1’


2025-07-18 09:54:58 (4.44 MB/s) - ‘new_cap.mp4.1’ saved [10819664/10819664]

--2025-07-18 09:54:58--  https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/OCR_inference.h5
Resolving filedn.eu (filedn.eu)... 185.62.236.186
Connecting to filedn.eu (filedn.eu)|185.62.236.186|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17463208 (17M) [application/octet-stream]
Saving to: ‘OCR_inference.h5.1’


2025-07-18 09:55:02 (6.14 MB/s) - ‘OCR_inference.h5.1’ saved [17463208/17463208]

--2025-07-18 09:55:02--  https://filedn.eu/l1MYFwJMIh4Y60BIIrYyMiy/licence_plate_detection/plate_charset%20_OCR.txt
Resolving filedn.eu (filedn.eu).

In [3]:
car_model = YOLO('/content/vehicle.pt')
license_plate_detector = YOLO('/content/plate.pt')
tracker = sv.ByteTrack()

# Load OCR model
def ctc_lambda(*args, **kwargs):
    return None

OCR = load_model('/content/OCR_inference.h5', compile=False)

# Load charset for OCR
with open('/content/plate_charset _OCR.txt', 'r', encoding='utf-8') as f:
    charset = [line.strip() for line in f.readlines()]
charset_with_blank = charset + ['']

# Load Persian font
persian_font = ImageFont.truetype("/content/Vazirmatn-Black.ttf", 40)

def license_complies_format(text):
    """
    Check if license plate complies with common format
    """
    if len(text) < 3 or len(text) > 10:
        return False

    # Check for minimum alphanumeric characters
    alphanumeric_count = sum(c.isalnum() for c in text)
    if alphanumeric_count < 3:
        return False

    # Check for too many special characters
    special_count = sum(not c.isalnum() for c in text)
    if special_count > 2:
        return False

    return True

def format_license(text):
    """
    Clean and format license plate text
    """
    # Remove extra spaces and special characters
    text = re.sub(r'[^\w\s-]', '', text)
    text = text.strip().upper()

    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text)

    return text

def get_car(license_plate, vehicle_tracks):
    """
    Find which vehicle contains the license plate
    """
    x1, y1, x2, y2, score, class_id = license_plate

    for track in vehicle_tracks:
        xcar1, ycar1, xcar2, ycar2 = track['bbox']
        car_id = track['track_id']

        # Check if license plate is within car bounding box
        if x1 > xcar1 and y1 > ycar1 and x2 < xcar2 and y2 < ycar2:
            return xcar1, ycar1, xcar2, ycar2, car_id

    return -1, -1, -1, -1, -1

def draw_border(img, top_left, bottom_right, color=(0, 255, 0), thickness=3, line_length_x=50, line_length_y=50):
    """Draw corner borders around bounding box"""
    x1, y1 = top_left
    x2, y2 = bottom_right
    cv2.line(img, (x1, y1), (x1, y1 + line_length_y), color, thickness)
    cv2.line(img, (x1, y1), (x1 + line_length_x, y1), color, thickness)
    cv2.line(img, (x1, y2), (x1, y2 - line_length_y), color, thickness)
    cv2.line(img, (x1, y2), (x1 + line_length_x, y2), color, thickness)
    cv2.line(img, (x2, y1), (x2 - line_length_x, y1), color, thickness)
    cv2.line(img, (x2, y1), (x2, y1 + line_length_y), color, thickness)
    cv2.line(img, (x2, y2), (x2, y2 - line_length_y), color, thickness)
    cv2.line(img, (x2, y2), (x2 - line_length_x, y2), color, thickness)
    return img

def draw_persian_text(img, text, position, font, text_color=(0, 0, 0), bg_color=(255, 255, 255), border_color=(0, 0, 0)):
    """
    Draw Persian text on image using PIL with proper bidirectional text handling
    """
    # Handle bidirectional text properly (Persian + numbers)
    display_text = get_display(text)

    # Convert OpenCV image to PIL
    img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(img_pil)

    # Get text bounding box
    bbox = draw.textbbox((0, 0), display_text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]

    x, y = position

    # Ensure text is within image bounds
    x = max(0, min(x, img.shape[1] - text_width))
    y = max(text_height, min(y, img.shape[0]))

    # Draw background rectangle
    bg_padding = 10
    bg_rect = [
        x - bg_padding,
        y - text_height - bg_padding,
        x + text_width + bg_padding,
        y + bg_padding
    ]

    # Draw white background
    draw.rectangle(bg_rect, fill=bg_color)

    # Draw border
    draw.rectangle(bg_rect, outline=border_color, width=2)

    # Draw text with proper bidirectional handling
    draw.text((x, y - text_height), display_text, font=font, fill=text_color)

    # Convert back to OpenCV
    img_cv = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
    return img_cv

def ctc_decode(preds, charset):
    """Decode CTC predictions to text"""
    out_best = list(np.argmax(preds, axis=1))
    outstr = ""
    prev_char = -1
    blank_idx = len(charset) - 1

    for c in out_best:
        if c == prev_char or c == blank_idx:
            prev_char = c
            continue
        if c < len(charset):
            outstr += charset[c]
        else:
            outstr += "?"
        prev_char = c
    return outstr

def read_license_plate_with_crnn(license_plate_crop, ocr_model, charset):
    """Read license plate text using CRNN model"""
    img = cv2.cvtColor(license_plate_crop, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (160, 40))
    img = img.astype(np.float32) / 255.
    img = np.expand_dims(img, axis=0)
    img = np.expand_dims(img, axis=-1)

    preds = ocr_model.predict(img)
    preds = preds[0]
    text = ctc_decode(preds, charset)
    return text, float(np.max(preds))

def interpolate_bounding_boxes(data):
    """
    Interpolate missing bounding boxes for smoother tracking
    """
    # Group by car_id
    car_ids = data['car_id'].unique()

    interpolated_data = []

    for car_id in car_ids:
        car_data = data[data['car_id'] == car_id].copy()
        car_data = car_data.sort_values('frame_nmr')

        # Get frame range
        frames = car_data['frame_nmr'].values
        if len(frames) < 2:
            interpolated_data.append(car_data)
            continue

        # Create continuous frame range
        frame_range = np.arange(frames.min(), frames.max() + 1)

        # Interpolate car bounding boxes
        car_bbox_interp = []
        for i in range(4):  # x1, y1, x2, y2
            bbox_values = []
            for _, row in car_data.iterrows():
                bbox_str = str(row['car_bbox']).replace('[', '').replace(']', '')
                bbox = [float(x) for x in bbox_str.split()]
                bbox_values.append(bbox[i])

            # Interpolate
            f = interpolate.interp1d(frames, bbox_values, kind='linear',
                                   bounds_error=False, fill_value='extrapolate')
            car_bbox_interp.append(f(frame_range))

        # Interpolate license plate bounding boxes
        lp_bbox_interp = []
        for i in range(4):  # x1, y1, x2, y2
            bbox_values = []
            for _, row in car_data.iterrows():
                bbox_str = str(row['license_plate_bbox']).replace('[', '').replace(']', '')
                bbox = [float(x) for x in bbox_str.split()]
                bbox_values.append(bbox[i])

            f = interpolate.interp1d(frames, bbox_values, kind='linear',
                                   bounds_error=False, fill_value='extrapolate')
            lp_bbox_interp.append(f(frame_range))

        # Create interpolated rows
        for frame_idx, frame_num in enumerate(frame_range):
            if frame_num in frames:
                # Use original data
                original_row = car_data[car_data['frame_nmr'] == frame_num].iloc[0]
                interpolated_data.append(original_row.to_dict())
            else:
                # Use interpolated data
                car_bbox_str = ' '.join([str(int(car_bbox_interp[i][frame_idx])) for i in range(4)])
                lp_bbox_str = ' '.join([str(int(lp_bbox_interp[i][frame_idx])) for i in range(4)])

                # Get the best license plate text for this car
                best_row = car_data.loc[car_data['license_number_score'].idxmax()]

                interpolated_row = {
                    'frame_nmr': frame_num,
                    'car_id': car_id,
                    'car_bbox': car_bbox_str,
                    'license_plate_bbox': lp_bbox_str,
                    'license_plate_bbox_score': best_row['license_plate_bbox_score'],
                    'license_number': best_row['license_number'],
                    'license_number_score': best_row['license_number_score']
                }
                interpolated_data.append(interpolated_row)

    return pd.DataFrame(interpolated_data)


In [4]:
cap = cv2.VideoCapture('/content/new_cap.mp4')

total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

results = {}
vehicles = [0]
scale = 2
frame_nmr = -1
ret = True
num_processed = 0

print("Starting license plate detection with ByteTrack...")

while ret:
    frame_nmr += 1
    ret, frame = cap.read()
    if not ret:
        break

    results[frame_nmr] = {}

    # Vehicle detection
    vehicle_detections = car_model(frame)[0]
    detections = sv.Detections.from_ultralytics(vehicle_detections)
    vehicle_mask = detections.class_id == 0
    vehicle_detections_filtered = detections[vehicle_mask]
    vehicle_tracks = tracker.update_with_detections(vehicle_detections_filtered)

    # Convert tracks to format
    vehicle_track_list = []
    for i, track_id in enumerate(vehicle_tracks.tracker_id):
        if track_id is not None:
            bbox = vehicle_tracks.xyxy[i]
            vehicle_track_list.append({
                'bbox': bbox,
                'track_id': track_id
            })

    # License plate detection
    license_plates = license_plate_detector(frame)[0]

    for license_plate in license_plates.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = license_plate

        # Find corresponding vehicle
        xcar1, ycar1, xcar2, ycar2, car_id = get_car(license_plate, vehicle_track_list)

        if car_id == -1:
            continue

        # Extract license plate crop
        if int(y2) > int(y1) and int(x2) > int(x1):
            license_plate_crop = frame[int(y1):int(y2), int(x1):int(x2), :]
            license_plate_crop_big = cv2.resize(
                license_plate_crop,
                (license_plate_crop.shape[1] * scale, license_plate_crop.shape[0] * scale),
                interpolation=cv2.INTER_CUBIC  # Better interpolation
            )

            # OCR recognition
            license_plate_text, license_plate_text_score = read_license_plate_with_crnn(
                license_plate_crop_big, OCR, charset_with_blank
            )

            # Format and validate license plate
            license_plate_text = format_license(license_plate_text)

            if license_plate_text and license_complies_format(license_plate_text):
                print(f'Frame: {frame_nmr}, Car: {car_id}, Plate: {license_plate_text}, Score: {license_plate_text_score:.3f}')

                results[frame_nmr][car_id] = {
                    'car': {'bbox': [xcar1, ycar1, xcar2, ycar2]},
                    'license_plate': {
                        'bbox': [x1, y1, x2, y2],
                        'text': license_plate_text,
                        'bbox_score': score,
                        'text_score': license_plate_text_score
                    }
                }

    num_processed += 1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
Frame: 189, Car: 14, Plate: س50, Score: 1.000

0: 640x384 3 Vehicles, 134.2ms
Speed: 3.9ms preprocess, 134.2ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 کل ناحیه پلاکs, 138.4ms
Speed: 5.7ms preprocess, 138.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
Frame: 190, Car: 17, Plate: 44ص16334, Score: 1.000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
Frame: 190, Car: 14, Plate: ص65, Score: 1.000

0: 640x384 3 Vehicles, 150.0ms
Speed: 5.3ms preprocess, 150.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 کل ناحیه پلاکs, 141.9ms
Speed: 4.0ms preprocess, 141.9ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 384)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [5]:
def write_csv(results, output_path):
    """Write results to CSV file"""
    with open(output_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['frame_nmr', 'car_id', 'car_bbox',
                         'license_plate_bbox', 'license_plate_bbox_score', 'license_number',
                         'license_number_score'])

        for frame_nmr in results.keys():
            for car_id in results[frame_nmr].keys():
                data = results[frame_nmr][car_id]
                if 'car' in data and 'license_plate' in data and 'text' in data['license_plate']:
                    writer.writerow([
                        frame_nmr,
                        car_id,
                        ' '.join(map(str, data['car']['bbox'])),
                        ' '.join(map(str, data['license_plate']['bbox'])),
                        data['license_plate']['bbox_score'],
                        data['license_plate']['text'],
                        data['license_plate']['text_score']
                    ])

# Save results
write_csv(results, '/content/results_bytetrack.csv')


In [6]:
results_df = pd.read_csv('/content/results_bytetrack.csv')

# Apply interpolation to smooth tracking
print("Applying data interpolation for smoother tracking...")
interpolated_df = interpolate_bounding_boxes(results_df)

# Get best license plate text for each car
license_plate_data = {}
for car_id in np.unique(interpolated_df['car_id']):
    car_data = interpolated_df[interpolated_df['car_id'] == car_id]

    # Get highest confidence detection
    max_score_idx = car_data['license_number_score'].idxmax()
    best_row = car_data.loc[max_score_idx]

    license_plate_data[car_id] = {
        'license_plate_number': best_row['license_number'],
        'confidence': best_row['license_number_score']
    }

# Video generation with improved visualization
video_path = '/content/new_cap.mp4'
cap = cv2.VideoCapture(video_path)

# Video writer setup
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('/content/output_video.mp4', fourcc, fps, (width, height))

print("Generating output video with Persian text support...")

# Process frames for video output
frame_numbers = sorted(interpolated_df['frame_nmr'].unique())
for frame_nmr in frame_numbers:
    cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_nmr))
    ret, frame = cap.read()

    if not ret or frame is None:
        continue

    # Get detections for this frame
    frame_data = interpolated_df[interpolated_df['frame_nmr'] == frame_nmr]

    for _, row in frame_data.iterrows():
        car_id = row['car_id']

        # Parse car bbox
        car_bbox_str = str(row['car_bbox']).replace('[', '').replace(']', '')
        car_x1, car_y1, car_x2, car_y2 = [int(float(x)) for x in car_bbox_str.split()]

        # Parse license plate bbox
        lp_bbox_str = str(row['license_plate_bbox']).replace('[', '').replace(']', '')
        x1, y1, x2, y2 = [int(float(x)) for x in lp_bbox_str.split()]

        # Draw car border (thinner, green)
        draw_border(frame, (car_x1, car_y1), (car_x2, car_y2), (0, 255, 0), 3,
                   line_length_x=50, line_length_y=50)

        # Draw license plate rectangle (thinner, red)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)

        # Add license plate text with Persian font support
        if car_id in license_plate_data:
            text = license_plate_data[car_id]['license_plate_number']
            confidence = license_plate_data[car_id]['confidence']

            # Create text display
            display_text = f"{text}"

            # Calculate text position
            car_mid_x = int((car_x1 + car_x2) / 2)
            text_x = car_mid_x - 100  # Approximate center adjustment
            text_y = max(car_y1 - 20, 50)

            # Draw Persian text using PIL
            frame = draw_persian_text(
                frame,
                display_text,
                (text_x, text_y),
                persian_font,
                text_color=(0, 0, 0),
                bg_color=(255, 255, 255),
                border_color=(0, 0, 0)
            )

    out.write(frame)

out.release()
cap.release()

print("Video processing completed!")
print(f"Output saved as 'output_video.mp4'")
print(f"Total cars tracked: {len(license_plate_data)}")
for car_id, data in license_plate_data.items():
    print(f"Car {car_id}: {data['license_plate_number']} (confidence: {data['confidence']:.3f})")

Applying data interpolation for smoother tracking...
Generating output video with Persian text support...
Video processing completed!
Output saved as 'output_video.mp4'
Total cars tracked: 27
Car 1: 99م28299 (confidence: 1.000)
Car 2: 54ی99550 (confidence: 1.000)
Car 4: 86ص2263 (confidence: 1.000)
Car 5: 44ص76555 (confidence: 1.000)
Car 6: 47ن1494 (confidence: 1.000)
Car 8: 35ط24360 (confidence: 1.000)
Car 10: 5م8 (confidence: 1.000)
Car 14: 77س23650 (confidence: 1.000)
Car 17: 4س163 (confidence: 1.000)
Car 20: 85د6935 (confidence: 1.000)
Car 21: 74ج97148 (confidence: 1.000)
Car 22: 18ی82440 (confidence: 1.000)
Car 24: 12ب71210 (confidence: 1.000)
Car 26: 13ن61911 (confidence: 1.000)
Car 29: 87م7725 (confidence: 1.000)
Car 31: 49ب54688 (confidence: 1.000)
Car 34: 35ل31188 (confidence: 1.000)
Car 37: 41ص19560 (confidence: 1.000)
Car 38: 36د12311 (confidence: 1.000)
Car 40: 17ق876 (confidence: 1.000)
Car 42: 14د95244 (confidence: 1.000)
Car 44: 1و1520 (confidence: 1.000)
Car 47: 83س33144