## **PREPROCESSING: BORDER REMOVAL**
 
This segment crops specific pixel margins from the edges to eliminate

the scanner/input frame, preventing it from interfering with

contour detection and slip segmentation.


In [1]:
import cv2

# 1. Load the original image
img = cv2.imread('input_image.jpeg')

if img is not None:
    # Get dimensions (Height, Width)
    h, w = img.shape[:2]

    # 2. Define your specific pixel offsets
    top = 22
    bottom = 33
    right = 16
    left = 26

    # 3. Apply the crop
    # Vertical (Y): Start at 'top', end at 'total height - bottom'
    # Horizontal (X): Start at 'left', end at 'total width - right'
    cropped_img = img[top : h - bottom, left : w - right]

    # 4. Save the result
    cv2.imwrite('preprocessed_input_image.jpg', cropped_img)

    print(f"Original dimensions: {w}w x {h}h")
    print(f"New dimensions: {cropped_img.shape[1]}w x {cropped_img.shape[0]}h")
    print("Successfully removed the specified pixels from all four sides.")
else:
    print("Error: Could not find 'input_image.jpeg'.")

Original dimensions: 1085w x 1542h
New dimensions: 1043w x 1487h
Successfully removed the specified pixels from all four sides.


## Recursive Sobel Splitting Module
This segment handles "Mega-Slips" (tall blocks containing multiple receipts) by analyzing horizontal intensity gradients to find clean cutting points.

In [2]:
import cv2
import os
import numpy as np

def recursive_sobel_split(img, x, y, w, h, output_folder, annotated_img):
    """
    Analyzes a mega-slip using Horizontal Sobel to find gaps and split recursively.
    """
    # Define a global counter for the function to keep track of slips
    if not hasattr(recursive_sobel_split, "count"):
        recursive_sobel_split.count = 1

    # Extract the ROI (the mega-slip)
    roi = img[y:y+h, x:x+w]
    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

    # 1. Apply Horizontal Sobel to find horizontal line structures/gaps
    # ksize=3 is standard; we focus on the y-direction (0, 1)
    sobel_y = cv2.Sobel(gray_roi, cv2.CV_64F, 0, 1, ksize=3)
    sobel_y = np.absolute(sobel_y)
    sobel_y = np.uint8(sobel_y)

    # 2. Horizontal Projection of the Sobel edges
    # We look for a 'valley' where there are very few horizontal edges (the gap)
    projection = np.sum(sobel_y, axis=1)

    # We only want to split if the block is tall enough to potentially be 2 slips
    if h > 1050:
        # Search for a gap in the middle 30% to 70% of the block to avoid edge noise
        search_start, search_end = int(h * 0.3), int(h * 0.7)
        # Find the row with the minimum horizontal gradient (the cleanest gap)
        split_row = search_start + np.argmin(projection[search_start:search_end])

        # Check if this "valley" is actually a gap (not just a dark line of text)
        if projection[split_row] < np.mean(projection) * 0.5:
            # Split into Top and Bottom
            recursive_sobel_split(img, x, y, w, split_row, output_folder, annotated_img)
            recursive_sobel_split(img, x, y + split_row, w, h - split_row, output_folder, annotated_img)
            return

    # 3. Base Case: If no split is needed or found, save the slip
    final_h = h
    final_w = w

    # Save the resulting slip
    file_path = os.path.join(output_folder, f"slip_{recursive_sobel_split.count}.jpg")
    cv2.imwrite(file_path, roi)

    # Annotate with a unique color (Cyan) to show it was a split result
    cv2.rectangle(annotated_img, (x, y), (x + w, y + h), (255, 255, 0), 4)
    cv2.putText(annotated_img, f"Split {recursive_sobel_split.count}: {w}x{h}",
                (x + 5, y + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
    

    print(f"‚úÖ Saved Recursive Slip {recursive_sobel_split.count}: {w}x{h}")
    recursive_sobel_split.count += 1


## Main Detection & Segmentation Pipeline
This segment performs global image preprocessing and identifies both standard slips and mega-slips using morphological operations.

In [3]:
def final_split_slips(image_path, output_folder):
    img = cv2.imread(image_path)
    if img is None: return
    annotated_img = img.copy()
    if not os.path.exists(output_folder): os.makedirs(output_folder)

    # Preprocessing (Unchanged from your working version)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (9, 9), 0)
    edged = cv2.Canny(blurred, 30, 150)
    kernel = np.ones((120, 1), np.uint8)
    closed = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel)
    dilated = cv2.dilate(closed, np.ones((3, 3), np.uint8), iterations=2)

    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Initialize recursive counter
    recursive_sobel_split.count = 1

    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)

        # 5. Logic for Standard Slips (Unchanged as requested)
        if (250 <= w <= 500) and (500 <= h <= 1100):
            roi = img[y:y+h, x:x+w]
            cv2.imwrite(os.path.join(output_folder, f"slip_{recursive_sobel_split.count}.jpg"), roi)
            cv2.rectangle(annotated_img, (x, y), (x + w, y + h), (0, 255, 0), 3)
            cv2.putText(annotated_img, f"Slip {recursive_sobel_split.count}: {w}x{h}",
                        (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
            print(f"‚úÖ Found Standard Slip {recursive_sobel_split.count}: {w}x{h}")
            recursive_sobel_split.count += 1

        # 6. Logic for Mega Slips (Recursive Sobel)
        elif h > 1100:
            print(f"üîç Mega-Slip detected ({w}x{h}). Starting Recursive Sobel Split...")
            recursive_sobel_split(img, x, y, w, h, output_folder, annotated_img)

    cv2.imwrite("annotated_evaluation.jpg", annotated_img)

## Driver Code / Execution
This segment initializes the process by defining the input file and output destination.

In [4]:
# Execute the pipeline
# Ensure 'preprocessed_input_image.jpg' exists in your working directory
final_split_slips('preprocessed_input_image.jpg', 'output_slips')

print("\nFinished. Check 'output_slips' and 'annotated_evaluation.jpg' for results.")

‚úÖ Found Standard Slip 1: 354x644
‚úÖ Found Standard Slip 2: 262x559
‚úÖ Found Standard Slip 3: 284x598
üîç Mega-Slip detected (397x1426). Starting Recursive Sobel Split...
‚úÖ Saved Recursive Slip 4: 397x544
‚úÖ Saved Recursive Slip 5: 397x882

Finished. Check 'output_slips' and 'annotated_evaluation.jpg' for results.
