In [None]:
#FilterImages

import os
import cv2
import numpy as np
from PIL import Image, ImageStat
from shutil import copyfile

# Create directories for input and output
input_folder = "train_images"
output_folder = "test_images"

os.makedirs(input_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

# Calculate brightness
def calculate_brightness(image):
    image = Image.fromarray(image)
    stat = ImageStat.Stat(image)
    return stat.mean[0]

# Calculate contrast
def calculate_contrast(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return gray.std()

# Function to calculate sharpness
def calculate_sharpness(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    laplacian = cv2.Laplacian(image, cv2.CV_64F).var()
    return laplacian

# Quality thresholds for OCR readiness
BRIGHTNESS_THRESHOLD = 50
CONTRAST_THRESHOLD = 20
SHARPNESS_THRESHOLD = 100

# Function to evaluate image quality
def evaluate_image_quality(image_path):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not read the image at {image_path}. Ensure it is a valid image file.")
    brightness = calculate_brightness(image)
    contrast = calculate_contrast(image)
    sharpness = calculate_sharpness(image)
    return brightness, contrast, sharpness

# Processing images
def process_images(input_folder, output_folder):
    results = []
    for image_name in os.listdir(input_folder):
        image_path = os.path.join(input_folder, image_name)
        if os.path.isfile(image_path):
            try:
                # Evaluate image 
                brightness, contrast, sharpness = evaluate_image_quality(image_path)
                score = (brightness >= BRIGHTNESS_THRESHOLD and
                         contrast >= CONTRAST_THRESHOLD and
                         sharpness >= SHARPNESS_THRESHOLD)
                results.append((image_name, brightness, contrast, sharpness, score))
                # Save the image to output folder 
                if score:
                    output_path = os.path.join(output_folder, image_name)
                    copyfile(image_path, output_path)
            except ValueError as e:
                # Log and skip invalid files
                print(e)
    return results

# Running  process
image_quality_results = process_images(input_folder, output_folder)

# Display results
for result in image_quality_results:
    print(f"Image: {result[0]}, Brightness: {result[1]:.2f}, Contrast: {result[2]:.2f}, Sharpness: {result[3]:.2f}, OCR Ready: {result[4]}")


In [1]:
#PreProcess Images

import os
import cv2
import numpy as np
from scipy.ndimage import interpolation as inter

# Define input and output folders
input_folder = 'test_images'
output_folder = 'output_images'

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

def preprocess_image(image_path, output_path):
    # Load the image
    image = cv2.imread(image_path)
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian Blur for noise reduction
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    
    # pply Otsu's Thresholding for binarization
    _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Deskew the image
    coords = np.column_stack(np.where(binary > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    
    # Rotate the image to deskew
    (h, w) = binary.shape[:2]
    center = (w // 2, h // 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    deskewed = cv2.warpAffine(binary, rotation_matrix, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    
    # Sharpen the image
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    sharpened = cv2.filter2D(deskewed, -1, kernel)
    
    # Save preprocessed image
    cv2.imwrite(output_path, sharpened)

# Iterate over all images in the input folder
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        
        preprocess_image(input_path, output_path)

print(f"Preprocessing complete. Preprocessed images are saved in '{output_folder}'.")


Preprocessing complete. Preprocessed images are saved in 'output_images'.
