In [1]:
import sys
print(sys.executable)

c:\Users\Administrator\deployment\deploy\Scripts\python.exe


In [60]:
import torch
import torch_directml  # If using DirectML
import cv2
import numpy as np
import gradio as gr
import easyocr
import os
from pathlib import Path
from ultralytics import YOLO
import csv

# Device (Use DirectML if available and desired, otherwise "cuda" or "cpu")
device = torch_directml.device() if torch_directml.is_available() else ("cuda" if torch.cuda.is_available() else "cpu")

# YOLO Model Loading
yolo_model_path = r"C:\\Users\\Administrator\\deployment\\weights\\best.pt" # Replace with your YOLO model path
if not os.path.exists(yolo_model_path):
    raise FileNotFoundError(f"YOLO model file not found: {yolo_model_path}")

model = YOLO(yolo_model_path)
model.fuse()  # Optimize

# Super-Resolution Model Loading
sr_model_path = r"C:\\Users\\Administrator\\deployment\\TF-ESPCN\\export\\ESPCN_x2.pb"  # Replace with your SR model path
if not os.path.exists(sr_model_path):
    raise FileNotFoundError(f"Super-Resolution model file not found: {sr_model_path}")

sr = cv2.dnn_superres.DnnSuperResImpl_create()
sr.readModel(sr_model_path)
sr.setModel("espcn", 2)

# OCR Reader
reader = easyocr.Reader(['bn'], gpu=torch.cuda.is_available()) # Use GPU if available

# Image Processing Functions (same as before)
def apply_super_resolution(image):
    return sr.upsample(image)

def apply_dilation(image):
    kernel = np.ones((3,3), np.uint8)
    return cv2.dilate(image, kernel, iterations=1)

def apply_erosion(image):
    kernel = np.ones((3,3), np.uint8)
    return cv2.erode(image, kernel, iterations=1)

def preprocess_upscale_only(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return apply_super_resolution(gray_image)

def preprocess_upscale_morph(image):
    upscaled_image = preprocess_upscale_only(image)
    return apply_erosion(apply_dilation(upscaled_image))

def extract_text(image):
    upscaled_image = preprocess_upscale_only(image)
    text_upscale = " ".join(reader.readtext(upscaled_image, detail=0))
    upscaled_morph_image = preprocess_upscale_morph(image)
    text_upscale_morph = " ".join(reader.readtext(upscaled_morph_image, detail=0))
    return text_upscale, text_upscale_morph

# Process Single Image
def process_image(image):
    results = model(image)
    annotated_img = results[0].plot() # Plot detections on the original image.

    ocr_texts = []
    ocr_texts_morph = []
    for box in results[0].boxes: # Iterate through detected objects.
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        cropped_image = image[y1:y2, x1:x2]

        text_upscale, text_upscale_morph = extract_text(cropped_image)
        ocr_texts.append(text_upscale)
        ocr_texts_morph.append(text_upscale_morph)

    combined_ocr_text = " ".join(ocr_texts)
    combined_ocr_text_morph = " ".join(ocr_texts_morph)

    return annotated_img, combined_ocr_text, combined_ocr_text_morph

# Process Single Image (Corrected)
def process_image(image):
    results = model(image)
    annotated_img = results[0].plot()

    ocr_texts = []
    ocr_texts_morph = []
    cropped_images = []

    for i, box in enumerate(results[0].boxes):
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        cropped_image = image[y1:y2, x1:x2]

        text_upscale, text_upscale_morph = extract_text(cropped_image)
        ocr_texts.append(text_upscale)
        ocr_texts_morph.append(text_upscale_morph)
        cropped_images.append(cropped_image)  # Add the actual image to the list

    combined_ocr_text = " ".join(ocr_texts)
    combined_ocr_text_morph = " ".join(ocr_texts_morph)

    # Pad the cropped_images list if there are fewer than 5 crops
    while len(cropped_images) < 5:
        cropped_images.append(None)  # Add None for empty image slots

    return annotated_img, combined_ocr_text, combined_ocr_text_morph, *cropped_images  # Unpack the list



# Process Video (same as before)

# Gradio Interface (Corrected)
iface = gr.Interface(
    fn=lambda img, vid: process_image(img) if img is not None else process_video(vid),
    inputs=[
        gr.Image(type='numpy', label='Upload Image'),
        gr.File(label='Upload Video', type="filepath")
    ],
    outputs=[
        gr.Image(label='Processed Image'),
        gr.Textbox(label='OCR Text (Upscaled)'),
        gr.Textbox(label='OCR Text (Upscaled + Morph)'),
        gr.Image(label=f"Cropped 1"),  # Define each image output separately
        gr.Image(label=f"Cropped 2"),
        gr.Image(label=f"Cropped 3"),
        gr.Image(label=f"Cropped 4"),
        gr.Image(label=f"Cropped 5"),
    ]
)

if __name__ == "__main__":
    iface.launch()


YOLOv5s summary (fused): 193 layers, 9,111,923 parameters, 0 gradients, 23.8 GFLOPs


Using CPU. Note: This module is much faster with a GPU.


* Running on local URL:  http://127.0.0.1:7899

To create a public link, set `share=True` in `launch()`.



0: 640x640 1 LP, 184.0ms
Speed: 5.0ms preprocess, 184.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)


In [8]:
import os

model_path = r"C:\\Users\\Administrator\\deployment\\EDSR_Tensorflow-master\\models\\EDSR_x2.pb"

if os.path.exists(model_path):
    print("Model file exists ✅")
else:
    print("Model file NOT found ❌")

Model file exists ✅


In [72]:
import torch
import torch_directml  # If using DirectML
import cv2
import numpy as np
import gradio as gr
import easyocr
import os
from pathlib import Path
from ultralytics import YOLO

# Device (Use DirectML if available and desired, otherwise "cuda" or "cpu")
device = torch_directml.device() if torch_directml.is_available() else ("cuda" if torch.cuda.is_available() else "cpu")

# YOLO Model Loading
yolo_model_path = r"C:\\Users\\Administrator\\deployment\\weights\\best.pt"  # Replace with your YOLO model path
if not os.path.exists(yolo_model_path):
    raise FileNotFoundError(f"YOLO model file not found: {yolo_model_path}")

model = YOLO(yolo_model_path)
model.fuse()  # Optimize

# Super-Resolution Model Loading
sr_model_path = r"C:\\Users\\Administrator\\deployment\\TF-ESPCN\\export\\ESPCN_x2.pb"  # Replace with your SR model path
if not os.path.exists(sr_model_path):
    raise FileNotFoundError(f"Super-Resolution model file not found: {sr_model_path}")

sr = cv2.dnn_superres.DnnSuperResImpl_create()
sr.readModel(sr_model_path)
sr.setModel("espcn", 2)

# OCR Reader
reader = easyocr.Reader(['bn'], gpu=torch.cuda.is_available())  # Use GPU if available

# Image Processing Functions (same as before)
def apply_super_resolution(image):
    return sr.upsample(image)

def apply_dilation(image):
    kernel = np.ones((3, 3), np.uint8)
    return cv2.dilate(image, kernel, iterations=1)

def apply_erosion(image):
    kernel = np.ones((3, 3), np.uint8)
    return cv2.erode(image, kernel, iterations=1)

def preprocess_upscale_only(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return apply_super_resolution(gray_image)

def preprocess_upscale_morph(image):
    upscaled_image = preprocess_upscale_only(image)
    return apply_erosion(apply_dilation(upscaled_image))

def extract_text(image):
    upscaled_image = preprocess_upscale_only(image)
    text_upscale = " ".join(reader.readtext(upscaled_image, detail=0))
    upscaled_morph_image = preprocess_upscale_morph(image)
    text_upscale_morph = " ".join(reader.readtext(upscaled_morph_image, detail=0))
    return text_upscale, text_upscale_morph

# Process Single Image
def process_image(image):
    results = model(image)
    annotated_img = results[0].plot()  # Plot detections on the original image.

    ocr_texts = []
    ocr_texts_morph = []
    for box in results[0].boxes:  # Iterate through detected objects.
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        cropped_image = image[y1:y2, x1:x2]

        text_upscale, text_upscale_morph = extract_text(cropped_image)
        ocr_texts.append(text_upscale)
        ocr_texts_morph.append(text_upscale_morph)

    combined_ocr_text = " ".join(ocr_texts)
    combined_ocr_text_morph = " ".join(ocr_texts_morph)

    return annotated_img, combined_ocr_text, combined_ocr_text_morph

# Process Video (Only Object Detection)
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Error: Could not open video.", None  # Return error message if video cannot be opened

    # Define output path and properties
    output_dir = Path("outputs")  # Ensure this directory exists for storing videos
    output_dir.mkdir(parents=True, exist_ok=True)
    output_path = output_dir / (Path(video_path).stem + '_output.mp4')

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Apply YOLO detection to each frame (no OCR in video processing)
        results = model(frame)
        annotated_frame = results[0].plot()  # Annotate the frame with detections
        out.write(annotated_frame)  # Write annotated frame to output video

    cap.release()
    out.release()

    return str(output_path)  # Return the full path of the annotated video

# Gradio Interface (Updated for Video and Image)
def gradio_interface(img, vid):
    if img is not None:
        # Process image
        processed_image, ocr_text, ocr_text_morph = process_image(img)
        return processed_image, ocr_text, ocr_text_morph, None  # Only image and text outputs
    
    if vid is not None:
        # Process video
        output_video = process_video(vid.name)  # Access the video file path properly
        return None, None, None, output_video  # Only video output
    
    return None, None, None, None  # In case of empty inputs

iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Image(type='numpy', label='Upload Image'),
        gr.File(label='Upload Video', type="filepath")  # Video file input
    ],
    outputs=[
        gr.Image(label='Processed Image'),
        gr.Textbox(label='OCR Text (Upscaled)'),
        gr.Textbox(label='OCR Text (Upscaled + Morph)'),
        gr.Textbox(label="Annotated Video Path")  # Display video path as text box
    ]
)

if __name__ == "__main__":
    iface.launch()


YOLOv5s summary (fused): 193 layers, 9,111,923 parameters, 0 gradients, 23.8 GFLOPs


Using CPU. Note: This module is much faster with a GPU.


* Running on local URL:  http://127.0.0.1:7910

To create a public link, set `share=True` in `launch()`.



0: 384x640 1 LP, 156.0ms
Speed: 5.0ms preprocess, 156.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 86.5ms
Speed: 3.0ms preprocess, 86.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 120.5ms
Speed: 3.0ms preprocess, 120.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 84.5ms
Speed: 3.0ms preprocess, 84.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 89.5ms
Speed: 2.0ms preprocess, 89.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 87.1ms
Speed: 3.0ms preprocess, 87.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 114.5ms
Speed: 6.0ms preprocess, 114.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 86.1ms
Speed: 3.0ms preprocess, 86.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LP, 

In [73]:
import torch
import torch_directml  # If using DirectML
import cv2
import numpy as np
import gradio as gr
import easyocr
import os
from pathlib import Path
from ultralytics import YOLO
import random
from PIL import Image

# Device (Use DirectML if available and desired, otherwise "cuda" or "cpu")
device = torch_directml.device() if torch_directml.is_available() else ("cuda" if torch.cuda.is_available() else "cpu")

# YOLO Model Loading
yolo_model_path = r"C:\\Users\\Administrator\\deployment\\weights\\best.pt"  # Replace with your YOLO model path
if not os.path.exists(yolo_model_path):
    raise FileNotFoundError(f"YOLO model file not found: {yolo_model_path}")

model = YOLO(yolo_model_path)
model.fuse()  # Optimize

# Super-Resolution Model Loading
sr_model_path = r"C:\\Users\\Administrator\\deployment\\TF-ESPCN\\export\\ESPCN_x2.pb"  # Replace with your SR model path
if not os.path.exists(sr_model_path):
    raise FileNotFoundError(f"Super-Resolution model file not found: {sr_model_path}")

sr = cv2.dnn_superres.DnnSuperResImpl_create()
sr.readModel(sr_model_path)
sr.setModel("espcn", 2)

# OCR Reader
reader = easyocr.Reader(['bn'], gpu=torch.cuda.is_available())  # Use GPU if available

# Image Processing Functions (same as before)
def apply_super_resolution(image):
    return sr.upsample(image)

def apply_dilation(image):
    kernel = np.ones((3, 3), np.uint8)
    return cv2.dilate(image, kernel, iterations=1)

def apply_erosion(image):
    kernel = np.ones((3, 3), np.uint8)
    return cv2.erode(image, kernel, iterations=1)

def preprocess_upscale_only(image):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return apply_super_resolution(gray_image)

def preprocess_upscale_morph(image):
    upscaled_image = preprocess_upscale_only(image)
    return apply_erosion(apply_dilation(upscaled_image))

def extract_text(image):
    upscaled_image = preprocess_upscale_only(image)
    text_upscale = " ".join(reader.readtext(upscaled_image, detail=0))
    upscaled_morph_image = preprocess_upscale_morph(image)
    text_upscale_morph = " ".join(reader.readtext(upscaled_morph_image, detail=0))
    return text_upscale, text_upscale_morph

# Function to get a random image from error_img directory
def get_random_error_image():
    error_img_dir = Path(r"C:\\Users\\Administrator\\deployment\\error_img")
    valid_image_extensions = ['.png', '.jpeg', '.jpg', '.webp']
    images = [f for f in error_img_dir.iterdir() if f.suffix.lower() in valid_image_extensions]
    return random.choice(images) if images else None

# Process Single Image
def process_image(image):
    results = model(image)
    
    # Check if no bounding boxes are found
    if len(results[0].boxes) == 0:
        # No detections, return random error image and custom OCR text
        random_error_image_path = get_random_error_image()
        if random_error_image_path:
            error_image = Image.open(random_error_image_path)
        else:
            error_image = Image.new('RGB', (256, 256), color='red')  # Placeholder image if none found

        return np.array(error_image), "I'm not offense, but the judging cat judge you!", "I'm not offense, but the judging cat judge you!"
    
    annotated_img = results[0].plot()  # Plot detections on the original image.

    ocr_texts = []
    ocr_texts_morph = []
    for box in results[0].boxes:  # Iterate through detected objects.
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        cropped_image = image[y1:y2, x1:x2]

        text_upscale, text_upscale_morph = extract_text(cropped_image)
        ocr_texts.append(text_upscale)
        ocr_texts_morph.append(text_upscale_morph)

    combined_ocr_text = " ".join(ocr_texts)
    combined_ocr_text_morph = " ".join(ocr_texts_morph)

    return annotated_img, combined_ocr_text, combined_ocr_text_morph

# Process Video (Only Object Detection)
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Error: Could not open video.", None  # Return error message if video cannot be opened

    # Define output path and properties
    output_dir = Path("outputs")  # Ensure this directory exists for storing videos
    output_dir.mkdir(parents=True, exist_ok=True)
    output_path = output_dir / (Path(video_path).stem + '_output.mp4')

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Apply YOLO detection to each frame (no OCR in video processing)
        results = model(frame)
        annotated_frame = results[0].plot()  # Annotate the frame with detections
        out.write(annotated_frame)  # Write annotated frame to output video

    cap.release()
    out.release()

    return str(output_path)  # Return the full path of the annotated video

# Gradio Interface (Updated for Video and Image)
def gradio_interface(img, vid):
    if img is not None:
        # Process image
        processed_image, ocr_text, ocr_text_morph = process_image(img)
        return processed_image, ocr_text, ocr_text_morph, None  # Only image and text outputs
    
    if vid is not None:
        # Process video
        output_video = process_video(vid.name)  # Access the video file path properly
        return None, None, None, output_video  # Only video output
    
    return None, None, None, None  # In case of empty inputs

iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Image(type='numpy', label='Upload Image'),
        gr.File(label='Upload Video', type="filepath")  # Video file input
    ],
    outputs=[
        gr.Image(label='Processed Image'),
        gr.Textbox(label='OCR Text (Upscaled)'),
        gr.Textbox(label='OCR Text (Upscaled + Morph)'),
        gr.Textbox(label="Annotated Video Path")  # Display video path as text box
    ]
)

if __name__ == "__main__":
    iface.launch()


YOLOv5s summary (fused): 193 layers, 9,111,923 parameters, 0 gradients, 23.8 GFLOPs


Using CPU. Note: This module is much faster with a GPU.


* Running on local URL:  http://127.0.0.1:7911

To create a public link, set `share=True` in `launch()`.



0: 640x640 1 LP, 195.9ms
Speed: 30.6ms preprocess, 195.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 608x640 (no detections), 177.5ms
Speed: 14.6ms preprocess, 177.5ms inference, 2.0ms postprocess per image at shape (1, 3, 608, 640)

0: 640x640 (no detections), 187.1ms
Speed: 31.0ms preprocess, 187.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 207.1ms
Speed: 16.5ms preprocess, 207.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 640)

0: 288x640 (no detections), 133.0ms
Speed: 6.0ms preprocess, 133.0ms inference, 0.0ms postprocess per image at shape (1, 3, 288, 640)

0: 640x640 1 LP, 204.0ms
Speed: 23.0ms preprocess, 204.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 207.1ms
Speed: 14.0ms preprocess, 207.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)


In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Sample Confusion Matrix
cm = np.array([[1011, 39], [8, 100]])
labels = ["LP", "background"]

# Set figure size
plt.figure(figsize=(12, 8))

# Create heatmap
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", linewidths=1, linecolor="black",
            xticklabels=labels, yticklabels=labels, annot_kws={"size": 14})

# Labels and title
plt.xlabel("True", fontsize=14)
plt.ylabel("Predicted", fontsize=14)
plt.title("Confusion Matrix", fontsize=16)

# Save as high-quality image
plt.savefig(r"E:\\backup\\back_zaif1\\back_zaif\\Documents\\thesis\\number_plate_bangla_recog\\models\\yolov8_80\\detect\\trainconfusion_matrix_high_quality.png", dpi=300, bbox_inches="tight")
plt.show()


KeyboardInterrupt: 