# OCR Prototypes

Rerun the first code cell to reload the files if new files are added

In [2]:
# Install the ipynb package
#%pip install ipynb

# Base code for all notebooks in the same folder
from ipynb.fs.full.common import setup, reload_packages # type: ignore

files = []
files = setup()

print(f"Number of files: {len(files)}")

import cv2
import numpy as np

print(np.__version__)
print(cv2.__version__)

/Users/tedbouskill/Repos/MyGitHub/wordscape-score-scraper/.venv/bin/python
3.11.11 | packaged by conda-forge | (main, Dec  5 2024, 14:24:23) [Clang 18.1.8 ]
Common Files Setup()
Looking for files in /Users/tedbouskill/Repos/MyGitHub/wordscape-score-scraper/prototypes/../png_samples
Number of files found: 8
Common Files Setup()
Looking for files in /Users/tedbouskill/Repos/MyGitHub/wordscape-score-scraper/prototypes/../png_samples
Number of files: 8
1.26.4
4.10.0


## Reload Packages()

Rerun the cell below this one to reload common packages in repo_packages or workspace_packages if they change

In [3]:
reload_packages()

from cls_img_tools import ImageTools

def no_op():
    pass

def scale_image(img, scale_factor):
    img_height, img_width = img.shape[:2]
    new_size = (int(img_width * scale_factor), int(img_height * scale_factor))
    return cv2.resize(img, new_size, interpolation=cv2.INTER_AREA)


Reloading packages


## Prototypes

In [None]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

from cls_env_tools import EnvTools

# Function to convert transparent background to white
def convert_transparent_to_white(image):
    if len(image.shape) == 3 and image.shape[2] == 4:  # Check if the image has an alpha channel
        alpha_channel = image[:, :, 3]
        rgb_channels = image[:, :, :3]

        # Create a white background image
        white_background = np.ones_like(rgb_channels, dtype=np.uint8) * 255

        # Blend the image with the white background using the alpha channel as a mask
        alpha_factor = alpha_channel[:, :, np.newaxis] / 255.0
        blended_image = rgb_channels * alpha_factor + white_background * (1 - alpha_factor)
        return blended_image.astype(np.uint8)
    elif len(image.shape) == 3 and image.shape[2] == 3:  # Image is already RGB
        return image
    else:  # Image is grayscale
        return cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)

# Function to visualize the result using matplotlib
def visualize_result(image, title="Result"):
    plt.figure(figsize=(10, 10))
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.title(title)
    plt.axis('off')
    plt.show()

# Find the root directory and construct the path to the sample image
star_img_path = os.path.join(EnvTools.find_repo_root(), "star.png")
star_img = cv2.imread(star_img_path, cv2.IMREAD_UNCHANGED)

if star_img is None:
    print(f"Error: Could not load sample image from {star_img_path}")
else:
    # Convert transparent background to white
    star_img = convert_transparent_to_white(star_img)
    star_img_gray = cv2.cvtColor(star_img, cv2.COLOR_BGR2GRAY)

    # Initialize SIFT detector
    sift = cv2.SIFT_create()

    # Detect and compute keypoints and descriptors for the sample image
    kp1, des1 = sift.detectAndCompute(star_img_gray, None)

    if des1 is None:
        print("Error: No descriptors found in the sample image.")
        visualize_result(star_img_gray, title="Sample Image Keypoints")
    else:
        # Visualize keypoints in the sample image
        img_with_keypoints = cv2.drawKeypoints(star_img_gray, kp1, None, color=(0, 255, 0))
        visualize_result(img_with_keypoints, title="Sample Image Keypoints")

        # Initialize the BFMatcher
        bf = cv2.BFMatcher()

        # Assuming 'files' is a list of image file paths
        for img_file in files:
            print(f"Processing {img_file}")

            image = cv2.imread(img_file, cv2.IMREAD_UNCHANGED)

            if image is None:
                print(f"Error: Could not load image from {img_file}")
                continue

            # Convert transparent background to white
            image = convert_transparent_to_white(image)
            image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # Detect and compute keypoints and descriptors for the current image
            kp2, des2 = sift.detectAndCompute(image_gray, None)

            if des2 is None:
                print(f"No descriptors found in image {img_file}.")
                img_with_keypoints = cv2.drawKeypoints(image_gray, kp2, None, color=(0, 255, 0))
                visualize_result(img_with_keypoints, title=f"Keypoints in {img_file}")
                continue

            # Match descriptors
            matches = bf.match(des1, des2)

            if not matches:
                print(f"No matches found between sample image and {img_file}.")
                continue

            # Sort matches by distance (lower distance is better)
            matches = sorted(matches, key=lambda x: x.distance)

            # Extract location of good matches
            src_pts = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 2)
            dst_pts = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 2)

            # Find homography
            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
            if M is not None:
                h, w = star_img_gray.shape
                pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
                dst = cv2.perspectiveTransform(pts, M)

                # Draw bounding box around the detected region
                image = cv2.polylines(image, [np.int32(dst)], True, (0, 255, 0), 3, cv2.LINE_AA)

            # Draw matches
            img_matches = cv2.drawMatches(star_img, kp1, image, kp2, matches, None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)

            # Visualize the result
            visualize_result(img_matches, title=f"Matching Result for {img_file}")

            print(f"Matching Image: {img_file}")
            print(f"Good Matches: {len(matches)}")

            break

print("Done")


In [None]:
import dis
from math import e
import cv2 # type: ignore
import numpy as np # type: ignore
import matplotlib.pyplot as plt # type: ignore

from PIL import Image # type: ignore

# Boxes should be ~1218 x ~208 pixels
# Or ~1220 x ~256 pixels

def preprocess_image(img_path):
    with Image.open(img_path) as img:
        img = img.convert("RGB")
        data = list(img.getdata())
        img_without_metadata = Image.new(img.mode, img.size)
        img_without_metadata.putdata(data)
        # Convert to NumPy array and then to OpenCV format (BGR)
        img_np = np.array(img_without_metadata)
        img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
        return img_cv

def grayscale_normalize_image(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return cv2.normalize(gray_img, None, 0, 255, cv2.NORM_MINMAX)

def transform_image(img):
    gn_img = grayscale_normalize_image(img)
    equalized = cv2.equalizeHist(gn_img)
    return equalized

def enhance_lines(img, original_img):
    # Apply Canny edge detection
    edges = cv2.Canny(img, 50, 150)

    # Dilate the edges to enhance thin lines
    kernel = np.ones((3, 3), np.uint8)
    dilated_edges = cv2.dilate(edges, kernel, iterations=1)

    # Convert dilated edges to a 3-channel image
    dilated_edges_colored = cv2.cvtColor(dilated_edges, cv2.COLOR_GRAY2BGR)

    # Combine the enhanced edges with the original image
    enhanced_img = cv2.addWeighted(original_img, 0.8, dilated_edges_colored, 0.2, 0)

    return enhanced_img

def enhance_contrast(img):
    # Enhance contrast with CLAHE
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast_enhanced = clahe.apply(img)
    def adjust_gamma(img, gamma=1.5):
        inv_gamma = 1.0 / gamma
        table = np.array([((i / 255.0) ** inv_gamma) * 255
                        for i in np.arange(0, 256)]).astype("uint8")
        return cv2.LUT(img, table)
    return adjust_gamma(contrast_enhanced, gamma=1.5)

def display_image(img, title="Image"):
    # Convert the image from BGR (OpenCV format) to RGB (matplotlib format)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Rotate the image 90 degrees clockwise for display
    img_rgb = cv2.rotate(img_rgb, cv2.ROTATE_90_CLOCKWISE)

    # Display the image using matplotlib
    plt.imshow(img_rgb)
    plt.title(title)
    plt.axis('off')  # Hide the axis
    plt.show()

for img_file in files:
    print(f"Processing {img_file}")

    #img = cv2.imread(img_file) #, cv2.IMREAD_UNCHANGED)  # Read the image
    img = preprocess_image(img_file)

    img_height, img_width, channels = img.shape
    print(f"\tOriginal image width: {img_width}, height: {img_height}, channels: {channels}")

    if img_width == 296:
        print(f"\tResizing image")
        img = scale_image(img, 4)
        img_height, img_width = img.shape[:2]

    t_img = transform_image(img)
    #display_image(t_img, title="Transformed Image")

    # Enhance thin white lines
    enhanced_lines = enhance_lines(t_img, img)
    #display_image(enhanced_lines, title="Enhanced Image")
    enhanced = cv2.cvtColor(enhanced_lines, cv2.COLOR_BGR2GRAY)
    #display_image(enhanced, title="Enhanced Grayscale Image")

    #enhanced = enhance_contrast(t_img)
    #display_image(enhanced, title="Enhanced Image")

    # Blur to reduce noise (experiment with different blur types and kernel sizes)
    blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
    #display_image(blurred, title="Blurred Image")

    #enhanced = blurred

    if (1 == 1):
        #    # Threshold to create a binary image (white lines on black background)
        _, binary = cv2.threshold(enhanced, 190, 255, cv2.THRESH_BINARY)
    elif (1 == 0):
        # Calculate a relative threshold based on image statistics
        mean_intensity = np.mean(enhanced)
        threshold_value = int(mean_intensity * 0.9)  # Adjust the factor (0.8) as needed
        # Use the relative threshold in adaptiveThreshold
        binary = cv2.adaptiveThreshold(
            enhanced,
            threshold_value,  # Relative threshold
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,
            11,
            2
        )
    else:
        # Use adaptive thresholding for binarization
        binary = cv2.adaptiveThreshold(
            enhanced,
            200,  # Maximum value for thresholding
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,
            11,
            2
        )
    display_image(binary, title="Binary Image")

    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for contour in contours:
        epsilon = 0.02 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)

        area = cv2.contourArea(contour)

        # Check for rectangular-like shapes with rounded corners
        if len(approx) > 4 and area > 5000:
            # Get bounding rectangle and aspect ratio
            x, y, w, h = cv2.boundingRect(approx)
            aspect_ratio = w / float(h)
            if 4.0 < aspect_ratio < 6.0: #and (200 <= h <= 270) and (1200 <= w <= 1240):  # Aspect ratio and size for rectangle
                print(f"area({area})")
                # Draw contours with bright orange color and thicker line
                cv2.drawContours(img, [contour], -1, (255, 140, 0), 5)

    display_image(img, title="Detected Boxes")

    print()