## Text Recognition

Now that we've extracted our images, we'll perform text recognition on the caption. This process involves checking the readability of the text, enhancing the image quality, and then using OCR (Optical Character Recognition) to extract the text.

In [None]:
import os
import time
import pandas as pd
from PIL import Image
from ultralytics import YOLO 
import torch  
import numpy as np
import glob
import pytesseract
from PIL import Image, ImageEnhance
from concurrent.futures import ThreadPoolExecutor

In [None]:
df = pd.read_csv('cropped_images_metadata.csv')

# Function to check text readability and correct orientation for a specific area of the image
def check_text_readability(img):
    img_width, img_height = img.size
    caption_area = img.crop((0, int(img_height * 0.95), img_width, img_height))

    # Try OCR at different rotations: 0, 90, 180, 270 degrees
    for angle in [0, 90, 180, 270]:
        test_img = caption_area.rotate(angle, expand=True)
        test_text = pytesseract.image_to_string(test_img, config='--psm 7')
        if any(char.isalpha() for char in test_text):
            if angle != 0:
                return img.rotate(angle, expand=True)
            return img
    return img

# Function to process each image and measure processing time
def process_image(row):
    cropped_img_path = os.path.join(output_folder, row['cropped_filename'])
    start_time = time.time()  # Start time measurement

    try:
        with Image.open(cropped_img_path) as img:
            img = check_text_readability(img)
            img_width, img_height = img.size
            caption_img = img.crop((0, int(img_height * 0.95), img_width, img_height))
            gray_img = caption_img.convert('L')
            contrast_enhancer = ImageEnhance.Contrast(gray_img)
            enhanced_img = contrast_enhancer.enhance(2)
            threshold = 128
            binarized_img = enhanced_img.point(lambda p: p > threshold and 255)
            text = pytesseract.image_to_string(binarized_img, config='--psm 6')
    except Exception as e:
        print(f"Failed to process {cropped_img_path}: {e}")
        text = ""

    end_time = time.time()  # End time measurement
    processing_time = end_time - start_time
    return text.strip(), processing_time

# Parallel processing and collect times
with ThreadPoolExecutor(max_workers=4) as executor:
    results = list(executor.map(process_image, df.to_dict('records')))

# Unpack results and processing times
text_data, times = zip(*results)

# Add extracted text to the DataFrame
df['extracted_text'] = text_data
df.to_csv('final_output_with_text.csv', index=False)

# Calculate average time per image
total_time = sum(times)
average_time = sum(times) / len(times)
print(f"Total processing time: {total_time:.2f} seconds")
print(f"Average processing time per image: {average_time:.2f} seconds")