# Run Inference on New Images and Preprocess Detected Regions

In [2]:
import cv2
import numpy as np
import os
import pytesseract
import pandas as pd

# Configuration and Path Setup

In [4]:
MODEL_CONFIG = r"C:\Users\hp\project 10\yolo_model\yolov3.cfg"
MODEL_WEIGHTS = r"C:\Users\hp\project 10\yolo_model\yolov3.weights"
CLASS_NAMES = r"C:\Users\hp\project 10\yolo_model\coco.names"
EAST_MODEL = r"C:\Users\hp\project 10\yolo_model\frozen_east_text_detection.pb"

INPUT_IMAGES_FOLDER = r"C:\Users\hp\project 10\raw_images"
OUTPUT_DETECTED_FOLDER = r"C:\Users\hp\project 10\detected_regions"
OUTPUT_PREPROCESSED_FOLDER = r"C:\Users\hp\project 10\preprocessed_images"
OUTPUT_CSV = r"C:\Users\hp\project 10\output.csv"

# Ensure output directories exist
os.makedirs(OUTPUT_DETECTED_FOLDER, exist_ok=True)
os.makedirs(OUTPUT_PREPROCESSED_FOLDER, exist_ok=True)

# Load EAST Text Detector model
net = cv2.dnn.readNet(EAST_MODEL)

# Load class names
with open(CLASS_NAMES, "r") as f:
    classes = f.read().strip().split("\n")

# Setup Tesseract OCR
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Function: Detect Text Regions Using EAST Model

In [6]:
def detect_text_regions(image_path):
    """
    Detect text regions in an image using the EAST model.
    :param image_path: Path to the input image.
    :return: List of detected text region image paths.
    """
    image = cv2.imread(image_path)
    orig = image.copy()
    (H, W) = image.shape[:2]

    # Resize image for EAST model (must be multiple of 32)
    newW, newH = (320, 320)
    rW, rH = W / float(newW), H / float(newH)
    image = cv2.resize(image, (newW, newH))
    blob = cv2.dnn.blobFromImage(image, 1.0, (newW, newH), (123.68, 116.78, 103.94), swapRB=True, crop=False)
    net.setInput(blob)
    (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])

    # Decode detected text regions
    boxes = []
    confidences = []
    for y in range(geometry.shape[2]):
        for x in range(geometry.shape[3]):
            if scores[0, 0, y, x] < 0.5:
                continue
            
            offsetX, offsetY = x * 4.0, y * 4.0
            angle = geometry[0, 4, y, x]
            cosA, sinA = np.cos(angle), np.sin(angle)
            h, w = geometry[0, 0, y, x], geometry[0, 1, y, x]
            endX, endY = int(offsetX + (cosA * w) + (sinA * h)), int(offsetY - (sinA * w) + (cosA * h))
            startX, startY = int(endX - w), int(endY - h)
            
            startX, startY, endX, endY = int(startX * rW), int(startY * rH), int(endX * rW), int(endY * rH)
            boxes.append((startX, startY, endX, endY))
            confidences.append(scores[0, 0, y, x])
    
    # Apply Non-Maximum Suppression (NMS)
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    detected_images = []

    if len(indices) > 0:
        for i in indices.flatten():
            x1, y1, x2, y2 = boxes[i]
            cropped_region = orig[y1:y2, x1:x2]
            output_path = os.path.join(OUTPUT_DETECTED_FOLDER, f"region_{i}.jpg")
            cv2.imwrite(output_path, cropped_region)
            detected_images.append(output_path)
    
    return detected_images


# Function: Extract Text Using Tesseract OCR

In [8]:
def extract_text(image_path):
    """
    Extract text from an image using Tesseract OCR.
    :param image_path: Path to the input image.
    :return: Extracted text.
    """
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    text = pytesseract.image_to_string(gray, config='--psm 6')
    return text.strip()

# Main OCR Pipeline Execution

In [10]:
ocr_results = []
for filename in os.listdir(INPUT_IMAGES_FOLDER):
    input_path = os.path.join(INPUT_IMAGES_FOLDER, filename)
    
    # Step 1: Detect text regions
    detected_regions = detect_text_regions(input_path)
    
    # Step 2: Extract text from detected text regions
    for detected_img in detected_regions:
        extracted_text = extract_text(detected_img)
        ocr_results.append((detected_img, extracted_text))

# Save OCR results to CSV
if ocr_results:
    df = pd.DataFrame(ocr_results, columns=["Image Path", "Extracted Text"])
    df.to_csv(OUTPUT_CSV, index=False)
    print("OCR Process Complete! Extracted text saved to CSV.")
else:
    print("No text detected in any image.")

OCR Process Complete! Extracted text saved to CSV.


# Extracting Text Using Tesseract

In [12]:
import cv2
import numpy as np
import os
import pytesseract
import csv

# Define paths
MODEL_CONFIG = r"C:\Users\hp\project 10\yolo_model\yolov3.cfg"
MODEL_WEIGHTS = r"C:\Users\hp\project 10\yolo_model\yolov3.weights"
CLASS_NAMES = r"C:\Users\hp\project 10\yolo_model\coco.names"
INPUT_IMAGES_FOLDER = r"C:\Users\hp\project 10\raw_images"
OUTPUT_DETECTED_FOLDER = r"C:\Users\hp\project 10\detected_regions"
OUTPUT_PREPROCESSED_FOLDER = r"C:\Users\hp\project 10\preprocessed_images"
OUTPUT_CSV = r"C:\Users\hp\project 10\output.csv"

# Ensure output folders exist
os.makedirs(OUTPUT_DETECTED_FOLDER, exist_ok=True)
os.makedirs(OUTPUT_PREPROCESSED_FOLDER, exist_ok=True)

# Load YOLO model
net = cv2.dnn.readNet(MODEL_WEIGHTS, MODEL_CONFIG)
with open(CLASS_NAMES, "r") as f:
    classes = f.read().strip().split("\n")
layer_names = net.getLayerNames()
out_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers().flatten()]

def detect_text_regions(image_path):
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    
    # Prepare image for YOLO
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    layer_outputs = net.forward(out_layers)
    
    boxes, confidences = [], []
    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and classes[class_id] == "text":
                box = detection[0:4] * np.array([width, height, width, height])
                (centerX, centerY, w, h) = box.astype("int")
                x, y = int(centerX - w / 2), int(centerY - h / 2)
                boxes.append([x, y, int(w), int(h)])
                confidences.append(float(confidence))
    
    # Apply Non-Maximum Suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    detected_images = []
    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]
            cropped_region = image[y:y+h, x:x+w]
            output_path = os.path.join(OUTPUT_DETECTED_FOLDER, f"detected_{os.path.basename(image_path)}")
            cv2.imwrite(output_path, cropped_region)
            detected_images.append(output_path)
    return detected_images

# Run inference on new images
for filename in os.listdir(INPUT_IMAGES_FOLDER):
    input_path = os.path.join(INPUT_IMAGES_FOLDER, filename)
    detect_text_regions(input_path)

print("Inference complete! Detected regions saved.")

# --- PREPROCESS DETECTED REGIONS ---
def preprocess_image(image_path, output_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not read {image_path}")
        return
    
    # Resize the image (scale 3x)
    image_resized = cv2.resize(image, None, fx=3, fy=3, interpolation=cv2.INTER_LINEAR)
    
    # Convert to grayscale
    gray = cv2.cvtColor(image_resized, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # Apply thresholding (Otsu's method)
    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Invert colors for better OCR accuracy
    processed = cv2.bitwise_not(thresh)
    
    # Save the preprocessed image
    cv2.imwrite(output_path, processed)
    print(f"Processed image saved: {output_path}")

# Process all detected region images
for filename in os.listdir(OUTPUT_DETECTED_FOLDER):
    input_path = os.path.join(OUTPUT_DETECTED_FOLDER, filename)
    output_path = os.path.join(OUTPUT_PREPROCESSED_FOLDER, filename)
    preprocess_image(input_path, output_path)

print("Preprocessing complete!")

# --- EXTRACT TEXT USING TESSERACT ---
def extract_text(image_path):
    image = cv2.imread(image_path)
    if image is None:
        return ""
    text = pytesseract.image_to_string(image, config='--psm 6')
    return text.strip()

# Open CSV file to write extracted text
with open(OUTPUT_CSV, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["Filename", "Extracted Text"])
    
    for filename in os.listdir(OUTPUT_PREPROCESSED_FOLDER):
        input_path = os.path.join(OUTPUT_PREPROCESSED_FOLDER, filename)
        extracted_text = extract_text(input_path)
        writer.writerow([filename, extracted_text])
        print(f"Text extracted from {filename} and saved.")

print("Text extraction complete! CSV file saved.")


Inference complete! Detected regions saved.
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_0.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_1.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_10.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_100.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_101.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_102.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_103.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_104.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_105.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_106.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\region_107.jpg
Processed image saved: C:\Users\hp\project 10\preprocessed_images\regi