In [1]:
import cv2
import os
import numpy as np
import easyocr
import matplotlib.pyplot as plt

In [2]:
def load_image_by_name(folder_path, target_filename):
    """
    Loads a specific image from a folder using OpenCV.

    Args:
        folder_path (str): Path to the folder containing images.
        target_filename (str): Name of the image file to load.

    Returns:
        tuple: (filename, image_array) if found, else None.
    """
    img_path = os.path.join(folder_path, target_filename)

    # Check if the file exists and is an image
    if os.path.exists(img_path) and target_filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        image = cv2.imread(img_path)  
        if image is not None:
            return (target_filename, image)
        else:
            print(f"Warning: Could not load {target_filename}")
            return None
    else:
        print(f"Error: '{target_filename}' not found in '{folder_path}'.")
        return None

In [3]:
def slice_image_horizontally(image, percentage=20):
    """
    Slices the image horizontally by a given percentage.
    Returns both the top and bottom slices.
    """
    height, width = image.shape[:2]
    slice_height = int((percentage / 100) * height)

    top_slice = image[:slice_height, :]
    bottom_slice = image[-slice_height:, :]

    return top_slice, bottom_slice

In [4]:
def display_slices(top_slice, bottom_slice):
    """
    Converts BGR images to RGB and displays them side by side using Matplotlib.

    Args:
        top_slice (numpy.ndarray): The top slice of the image.
        bottom_slice (numpy.ndarray): The bottom slice of the image.
    """
    if top_slice is None or bottom_slice is None:
        raise ValueError("Error: One or both image slices are None.")

    # Convert BGR to RGB for correct Matplotlib display
    top_slice_rgb = cv2.cvtColor(top_slice, cv2.COLOR_BGR2RGB)
    bottom_slice_rgb = cv2.cvtColor(bottom_slice, cv2.COLOR_BGR2RGB)

    plt.figure(figsize=(24, 18))

    # Display top slice
    plt.subplot(1, 2, 1)
    plt.imshow(top_slice_rgb)
    plt.title("Top Slice")
    plt.axis("off") 

    # Display bottom slice
    plt.subplot(1, 2, 2)
    plt.imshow(bottom_slice_rgb)
    plt.title("Bottom Slice")
    plt.axis("off") 

    plt.show()

In [5]:
def rejoin_slices(top_slice, bottom_slice):
    """
    Rejoins two slices (top and bottom) back into a single image.
    
    Args:
    - top_slice (numpy.ndarray): The top portion of the sliced image.
    - bottom_slice (numpy.ndarray): The bottom portion of the sliced image.
    
    Returns:
    - numpy.ndarray: The rejoined image.
    """
    # Ensure both slices have the same width
    if top_slice.shape[1] != bottom_slice.shape[1]:
        raise ValueError("Top and bottom slices must have the same width")
    
    # Stack the slices vertically (along the height axis)
    rejoined_image = np.vstack((top_slice, bottom_slice))
    
    return rejoined_image

In [6]:
def slice_image_width(image_array, slice_width=500):
    """
    Slices a large image (as a NumPy array) into smaller chunks by width only.
    
    Args:
    - image_array (numpy.ndarray): Image as a NumPy array.
    - output_dir (str): Directory to save the sliced chunks.
    - slice_width (int): Width of each slice.
    
    Returns:
    - List of file paths for the sliced images.
    """
    double_sliced_images = []
    # Get the width and height of the image from the NumPy array
    img_height, img_width, _ = image_array.shape
    
    # Slice the image by width
    for i, left in enumerate(range(0, img_width, slice_width)):
        # Define the slice box (only width changes)
        right = min(left + slice_width, img_width)
        
        # Slice the image (using NumPy array slicing)
        slice_img = image_array[:, left:right]
        double_sliced_images.append(slice_img)
    
    return double_sliced_images

In [7]:
def visualize_slices(sliced_images):
    """
    Visualizes the slices created by slice_image_width function.
    
    Args:
    - sliced_images (list of numpy.ndarray): List of sliced images.
    - slice_width (int): The width of each slice (used for figure size adjustment).
    """
    num_slices = len(sliced_images)
    
    plt.figure(figsize=(10, num_slices * 2))  # Adjust the height dynamically based on the number of slices
    
    for i, slice_img in enumerate(sliced_images):
        plt.subplot(num_slices, 1, i + 1)  # Create a subplot for each slice
        plt.imshow(slice_img)
        plt.title(f"Slice {i + 1}")  
        plt.axis('off')  
   
    plt.tight_layout()  
    plt.show()

In [8]:
def rotate_slices(image_slices, angle=90):
    """
    Rotates a list of image slices by a specified angle.

    Args:
        image_slices (list of numpy.ndarray): List of image slices.
        angle (float): Angle by which to rotate each image (default is 90° clockwise).

    Returns:
        list of numpy.ndarray: List of rotated image slices.
    """
    rotated_slices = []
    
    for img in image_slices:
        if img is None:
            rotated_slices.append(None)
            continue  

        # Get image dimensions
        (h, w) = img.shape[:2]
        center = (w // 2, h // 2)

        # Compute rotation matrix
        M = cv2.getRotationMatrix2D(center, angle, 1.0)

        # Compute the new bounding dimensions
        cos = np.abs(M[0, 0])
        sin = np.abs(M[0, 1])
        new_w = int((h * sin) + (w * cos))
        new_h = int((h * cos) + (w * sin))

        # Adjust the rotation matrix to keep the full image
        M[0, 2] += (new_w / 2) - center[0]
        M[1, 2] += (new_h / 2) - center[1]

        # Rotate image
        rotated = cv2.warpAffine(img, M, (new_w, new_h), borderMode=cv2.BORDER_REPLICATE)
        rotated_slices.append(rotated)

    return rotated_slices

In [9]:
def detect_and_display_text(image_slices):
    """
    Detects text in multiple image slices using EasyOCR and displays them in a grid.

    Args:
        image_slices (list): List of image slices (NumPy arrays).
    """
    if not image_slices:
        raise ValueError("Error: No image slices provided.")

    reader = easyocr.Reader(['en'], gpu=True)

    def detect_text(image):
        """Detects text in a single image slice."""
        results = reader.readtext(image, detail=0)  # Extract only text (no bounding box)
        return results

    num_slices = len(image_slices)
    
    # Define grid layout (adjust the number of columns as needed)
    cols = min(num_slices, 4)  # Max 4 images per row
    rows = (num_slices + cols - 1) // cols  # Calculate required rows

    # Create a single figure with subplots
    fig, axes = plt.subplots(rows, cols, figsize=(cols * 5, rows * 5))  
    axes = axes.ravel()  # Flatten in case of multiple rows

    for i, (slice_img, ax) in enumerate(zip(image_slices, axes)):
        if slice_img is None:
            ax.axis("off")
            continue 

        # Convert BGR to RGB for Matplotlib display
        slice_rgb = cv2.cvtColor(slice_img, cv2.COLOR_BGR2RGB)

        # Detect text in the slice
        detected_text = detect_text(slice_img)
        text_display = ' | '.join(detected_text) if detected_text else 'No text detected'

        # Display image slice
        ax.imshow(slice_rgb)
        ax.set_title(f"Slice {i+1}\n{text_display}", fontsize=10)
        ax.axis("off")  # Hide axes

    # Hide any unused subplots if num_slices is less than total grid size
    for j in range(i + 1, len(axes)):
        axes[j].axis("off")

    plt.tight_layout()  
    plt.show()

In [10]:
def extract_label_half(image_slices):
    """
    Detects which half (left or right) of the image contains text and returns only that half.

    Args:
        image_slices (list of numpy.ndarray): List of image slices.

    Returns:
        list of numpy.ndarray: List of image halves (sliced vertically after rotation) that contain the label.
    """
    extracted_halves = []

    for img in image_slices:
        if img is None:
            extracted_halves.append(None)
            continue  
        
        # Convert to grayscale for text detection
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        h, w = gray.shape[:2]
        
        # Split the image into left and right halves
        left_half = gray[:, :w//2]
        right_half = gray[:, w//2:]

        # Compute edge density using Canny edge detection (text usually has strong edges)
        left_edges = cv2.Canny(left_half, 50, 150)
        right_edges = cv2.Canny(right_half, 50, 150)

        # Count non-zero pixels (edges) in each half
        left_score = np.sum(left_edges > 0)
        right_score = np.sum(right_edges > 0)

        # Determine which side has more edges (likely the label)
        if right_score > left_score:
            extracted_halves.append(img[:, w//2:])  # Keep right half
        else:
            extracted_halves.append(img[:, :w//2])  # Keep left half

    return extracted_halves

/mnt/shared/eric/Full_Set_Processed/PARA2_BGSUB_wavelet/

In [11]:
PARA2_BGSUB_Wavelet_processed_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/PARA2_BGSUB_wavelet/PROCESSED' , '1968_3_19396513.jpeg_processed.tiff')
PARA2_BGSUB_Wavelet_binary_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/PARA2_BGSUB_wavelet/BINARIES' , '1968_3_19396513.jpeg_binary.tiff' )

In [12]:
first_top_processed, first_bottom_processed = slice_image_horizontally(PARA2_BGSUB_Wavelet_processed_img[1])
first_rejoined_processed = rejoin_slices(first_top_processed, first_bottom_processed)
first_processed_slices = slice_image_width(first_rejoined_processed)


In [None]:
detect_and_display_text(rotate_slices(first_processed_slices))

In [15]:
first_top_binary, first_bottom_binary = slice_image_horizontally(PARA2_BGSUB_Wavelet_binary_img[1])
first_rejoined_binary = rejoin_slices(first_top_binary, first_bottom_binary)
first_binary_slices = slice_image_width(first_rejoined_binary)

In [None]:
detect_and_display_text(rotate_slices(first_binary_slices))

/mnt/shared/eric/Full_Set_Processed/Full_set_PARA2_NoBGSub_wavelet/

In [17]:
PARA2_NoBGSub_wavelet_processed_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_set_PARA2_NoBGSub_wavelet/PROCESSED', '1968_3_19396513.jpeg_processed.tiff')
PARA2_NoBGSub_wavelet_binary_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_set_PARA2_NoBGSub_wavelet/BINARIES','1968_3_19396513.jpeg_binary.tiff')

In [18]:
second_top_processed, second_bottom_processed= slice_image_horizontally(PARA2_NoBGSub_wavelet_processed_img[1])
second_rejoined_processed = rejoin_slices(second_top_processed, second_bottom_processed)
second_processed_slices = slice_image_width(second_rejoined_processed)

In [None]:
detect_and_display_text(rotate_slices(second_processed_slices))

In [20]:
second_top_binary, second_bottom_binary = slice_image_horizontally(PARA2_NoBGSub_wavelet_binary_img[1])
second_rejoined_binary = rejoin_slices(second_top_binary, second_bottom_binary)
second_binary_slices = slice_image_width(second_rejoined_binary)

In [None]:
detect_and_display_text(rotate_slices(second_binary_slices))

/mnt/shared/eric/Full_Set_Processed/PARA2_BGSUB-nLMean/

In [28]:
PARA2_BGSUB_nLMean_processed_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/PARA2_BGSUB-nLMean/PROCESSED', '1968_3_19396513.jpeg_processed.tiff')
PARA2_BGSUB_nLMean_binary_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/PARA2_BGSUB-nLMean/BINARIES','1968_3_19396513.jpeg_binary.tiff')

In [29]:
third_top_processed, third_bottom_processed = slice_image_horizontally(PARA2_BGSUB_nLMean_processed_img[1])
third_rejoined_processed = rejoin_slices(third_top_processed, third_bottom_processed)
third_processed_slices = slice_image_width(third_rejoined_processed)

In [None]:
detect_and_display_text(rotate_slices(third_processed_slices))

In [30]:
third_top_binary, third_bottom_binary = slice_image_horizontally(PARA2_BGSUB_nLMean_binary_img[1])
third_rejoined_binary = rejoin_slices(third_top_binary, third_bottom_binary)
third_binary_slices = slice_image_width(third_rejoined_binary)

In [None]:
detect_and_display_text(rotate_slices(third_binary_slices))

/mnt/shared/eric/Full_Set_Processed/Full_Set_PARA2_noBGSub_nLMeans/


In [32]:
PARA2_noBGSub_nLMeans_processed_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_Set_PARA2_noBGSub_nLMeans/PROCESSED', '1968_3_19396513.jpeg_processed.tiff')
PARA2_noBGSub_nLMeans_binary_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_Set_PARA2_noBGSub_nLMeans/BINARIES','1968_3_19396513.jpeg_binary.tiff')

In [33]:
fourth_top_processed, fourth_bottom_processed = slice_image_horizontally(PARA2_noBGSub_nLMeans_processed_img[1])
fourth_rejoined_processed = rejoin_slices(fourth_top_processed, fourth_bottom_processed)
fourth_processed_slices = slice_image_width(fourth_rejoined_processed)

In [None]:
detect_and_display_text(rotate_slices(fourth_processed_slices))

In [34]:
fourth_top_binary, fourth_bottom_binary = slice_image_horizontally(PARA2_noBGSub_nLMeans_binary_img[1])
fourth_rejoined_binary = rejoin_slices(fourth_top_binary, fourth_bottom_binary)
fourth_binary_slices = slice_image_width(fourth_rejoined_binary)

In [None]:
detect_and_display_text(rotate_slices(fourth_binary_slices))

/mnt/shared/eric/Full_Set_Processed/Full_Set_No_BG_Subtract_Wavelet/

In [39]:
No_BG_Subtract_Wavelet_processed_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_Set_No_BG_Subtract_Wavelet/PROCESSED', '1968_3_19396513.jpeg_processed.tiff')
No_BG_Subtract_Wavelet_binary_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_Set_No_BG_Subtract_Wavelet/BINARIES','1968_3_19396513.jpeg_binary.tiff')

In [40]:
fifth_top_processed, fifth_bottom_processed = slice_image_horizontally(No_BG_Subtract_Wavelet_processed_img[1])
fifth_rejoined_processed = rejoin_slices(fifth_top_processed, fifth_bottom_processed)
fifth_processed_slices = slice_image_width(fifth_rejoined_processed)

In [None]:
detect_and_display_text(rotate_slices(fifth_processed_slices))

In [42]:
fifth_top_binary, fifth_bottom_binary = slice_image_horizontally(No_BG_Subtract_Wavelet_binary_img[1])
fifth_rejoined_binary = rejoin_slices(fifth_top_binary, fifth_bottom_binary)
fifth_binary_slices = slice_image_width(fifth_rejoined_binary)

In [None]:
detect_and_display_text(rotate_slices(fifth_binary_slices))

/mnt/shared/eric/Full_Set_Processed/Full_Set_No_BG_Subtract_nLMeans/

In [44]:
No_BG_Subtract_nLMeans_processed_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_Set_No_BG_Subtract_nLMeans/PROCESSED', '1968_3_19396513.jpeg_processed.tiff')
No_BG_Subtract_nLMeans_binary_img = load_image_by_name('/mnt/shared/eric/Full_Set_Processed/Full_Set_No_BG_Subtract_nLMeans/BINARIES','1968_3_19396513.jpeg_binary.tiff')

In [45]:
sixth_top_processed, sixth_bottom_processed = slice_image_horizontally(No_BG_Subtract_nLMeans_processed_img[1])
sixth_rejoined_processed = rejoin_slices(sixth_top_processed, sixth_bottom_processed)
sixth_processed_slices = slice_image_width(sixth_rejoined_processed)

In [None]:
detect_and_display_text(rotate_slices(sixth_processed_slices))

In [47]:
sixth_top_binary, sixth_bottom_binary = slice_image_horizontally(No_BG_Subtract_nLMeans_binary_img[1])
sixth_rejoined_binary = rejoin_slices(sixth_top_binary, fourth_bottom_binary)
sixth_binary_slices = slice_image_width(sixth_rejoined_binary)

In [None]:
detect_and_display_text(rotate_slices(sixth_binary_slices))

ORIGINALS

In [57]:
original_img = load_image_by_name('/mnt/input/Images', '1968_3_19396513.jpeg')

In [58]:
original_top_binary, original_bottom_binary = slice_image_horizontally(original_img[1])
original_rejoined_binary = rejoin_slices(original_top_binary, original_bottom_binary)
original_binary_slices = slice_image_width(original_rejoined_binary)

In [None]:
detect_and_display_text(rotate_slices(original_binary_slices))

We noticed that in the sliced images, easyOCR more accurate detects the wrtitten text at the edges of the image since it is written in a bigger font than the targeted labels on the image itself. To attempt at making the image fed to easyOCR more zoomed in on the label, or in other words, the labels to appear in a bigger font for more accurate detection, we tried to further slice the rotated images vertically in half and extract the halves the contain the labels without relying on easyOCR. Instead, since text usually has strong edges, we attempted at detecting thr labels using Canny edge detection, and on that basis, extract the image with the labels to apply easyOCR on. However, this approach was not successful in detecting the labeled halves in the majority of the images. 

In [None]:
sixth_rotated_processed_slices = rotate_slices(sixth_processed_slices)
halves = extract_label_half(sixth_rotated_processed_slices)
visualize_slices(halves)

In [53]:
def extract_label_half_ocr(image_slices):
    """
    Uses OCR to determine which half (left or right) of an image contains text 
    and returns only that half.

    Args:
        image_slices (list of numpy.ndarray): List of image slices.

    Returns:
        list of numpy.ndarray: List of image halves that contain the label.
    """
    reader = easyocr.Reader(['en'], gpu=True)  
    extracted_halves = []

    for img in image_slices:
        if img is None:
            extracted_halves.append(None)
            continue 
        
        # Convert to grayscale (optional, OCR can handle color)
        h, w = img.shape[:2]
        
        # Split the image into left and right halves
        left_half = img[:, :w//2]
        right_half = img[:, w//2:]

        # Run OCR on both halves
        left_text = reader.readtext(left_half, detail=0)  # Extract text only
        right_text = reader.readtext(right_half, detail=0)

        # Determine which side has more detected text
        if len(right_text) > len(left_text):
            extracted_halves.append(right_half)  
        else:
            extracted_halves.append(left_half)
            
    return extracted_halves

In [None]:
sixth_rotated_processed_slices = rotate_slices(sixth_processed_slices)
halves = extract_label_half_ocr(sixth_rotated_processed_slices)
visualize_slices(halves)

In [None]:
detect_and_display_text(halves)