In [None]:
%load_ext autoreload
%autoreload 3
%load_ext nb_black
%load_ext dotenv
%dotenv
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os
import glob
import pathlib
import subprocess

root = pathlib.Path(os.environ["DATA_ROOT"]).expanduser()

In [None]:
import cv2
import numpy as np

from matplotlib import pyplot as plt
import plotnine as p9
import pandas as pd

import sklearn.cluster

from ipywidgets import interact, IntSlider


%matplotlib inline

In [None]:
!ls

In [None]:
# Load all images
image_files = glob.glob("/tmp/tmpfpfmyvtl/*.png/*.png")
images = [cv2.imread(file) for file in image_files][::100]

# Resize images to common size (optional)
# size = (100, 100)
#images = [cv2.resize(img, size) for img in images]

# Flatten and concatenate pixel data
pixels = np.concatenate([img.reshape(-1, 3) for img in images])


# Train K-means model
kmeans = sklearn.cluster.KMeans(n_clusters=2)
kmeans.fit(pixels)

# Apply K-means model to each image
segmented_images = []
for img in images:
    labels = kmeans.predict(img.reshape(-1, 3))
    segmented_image = labels.reshape(img.shape[:2])
    segmented_images.append(segmented_image)


In [None]:
def crop_to_content(image, threshold=10):
    """
    Crops an image iteratively from the edges until each border contains no pixels darker than a threshold.

    Parameters:
    - image: The input image, should be a grayscale or binary image.
    - threshold: The pixel value below which a pixel is considered "black".

    Returns:
    - The cropped image.
    """

    # Convert the image to grayscale if it's not already
    if len(image.shape) > 2:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Get the image dimensions
    height, width = image.shape

    # Define the crop boundaries
    left = 0
    right = width - 1
    top = 0
    bottom = height - 1

    # Crop from the left
    while np.any(image[:, left] < threshold):
        left += 1

    # Crop from the right
    while np.any(image[:, right] < threshold):
        right -= 1

    # Crop from the top
    while np.any(image[top, :] < threshold):
        top += 1

    # Crop from the bottom
    while np.any(image[bottom, :] < threshold):
        bottom -= 1
        
    print(left, right, top, bottom)

    # Perform the crop
    image_cropped = image[top:bottom+1, left:right+1]

    return image_cropped


In [None]:
def crop_to_content(image, threshold=0.5):
    while True:
        rows, cols = image.shape
        top_row = image[0, :]
        bottom_row = image[-1, :]
        left_col = image[:, 0]
        right_col = image[:, -1]

        top_dark_pixels = np.sum(top_row < threshold)
        bottom_dark_pixels = np.sum(bottom_row < threshold)
        left_dark_pixels = np.sum(left_col < threshold)
        right_dark_pixels = np.sum(right_col < threshold)
        print(top_dark_pixels, bottom_dark_pixels, left_dark_pixels, right_dark_pixels)

        max_dark_pixels = max(top_dark_pixels, bottom_dark_pixels, left_dark_pixels, right_dark_pixels)

        if max_dark_pixels == 0:
            # No more dark pixels on the edges, stop cropping
            break

        if max_dark_pixels == top_dark_pixels:
            # Remove the top row
            image = image[1:, :]
        elif max_dark_pixels == bottom_dark_pixels:
            # Remove the bottom row
            image = image[:-1, :]
        elif max_dark_pixels == left_dark_pixels:
            # Remove the left column
            image = image[:, 1:]
        else:  # max_dark_pixels == right_dark_pixels
            # Remove the right column
            image = image[:, :-1]

    return image


In [None]:
pd.Series(segmented_images[0].ravel()).value_counts()

In [None]:
plt.imshow(segmented_images[0])

In [None]:
pimg = crop_to_content(segmented_images[0], 0.5)
plt.imshow(pimg)
cv2.imwrite("foo.png", (pimg * 255).astype(np.uint8))

In [None]:


# Define the image and parameters
image = "/tmp/tmpnzivpd0q/cutouts/cadence-output_001.png"
params = {
    #'threshold': 200,
    'number-digits': -1,
    "one-ratio": 7,
    #'foreground': 'black',
    #'background': 'white',
    #'iter-threshold': None,
    #'number-pixels': 5,
    
}

commands = [
    #"make_mono",
    #"grayscale",
    #"rgb_threshold"
    
]

# Run the function
run_ssocr(image, params, commands)




In [None]:
import subprocess
import cv2
import tempfile
import os

def run_ssocr(image, params, commands):
    # Define the command and parameters
    cmd = ['./ssocr-2.22.2/ssocr']
    
    # Add the parameters to the command
    for key, value in params.items():
        if value is None:
            cmd.append(f'--{key}')
        else:
            cmd.append(f'--{key}={value}')
        
    for _cmd in commands:
        cmd.extend(_cmd.split())
    
    # If the input is a numpy array, write it to a temporary file
    if isinstance(image, str):
        cmd.append(image)
    else:
        # Create a temporary file
        temp_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
        temp_file_path = temp_file.name
        # Write the image to the temporary file
        cv2.imwrite(temp_file_path, image)
        cmd.append(temp_file_path)
        
    #print(f"{cmd=}")

    # Run the command and get the output
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    
    # Delete the temporary file if it was used
    if not isinstance(image, str):
        os.remove(temp_file_path)

    # Check for errors
    if result.stderr:
        print(f'Error: {result.stderr.decode()}')

    # Return the output
    return result.stdout.decode().strip()


In [None]:
import ipywidgets as widgets
from IPython.display import display

images = sorted(glob.glob("/tmp/tmpnzivpd0q/cutouts/watts*.png"))


# Additional parameters
params = {
    'threshold': 50,
    'number-digits': -1,
    'foreground': 'black',
    'background': 'white',
    'iter-threshold': False,
    'number-pixels': 0,
    'ignore-pixels': 0,
    'one-ratio': 1.0,
    'minus-ratio': 1.0,
    'dec-h-ratio': 1.0,
    'dec-w-ratio': 1.0,
    'process-only': False,
    'print-info': False,
    'adjust-gray': False,
    'luminance': 'default',
    'print-spaces': False,
    'space-factor': 1.0,
    'space-average': False,
    'ascii-art-segments': False,
    'print-as-hex': False,
    'omit-decimal-point': False,
    'charset': 'default',
}

# Create widgets for the parameters
threshold_widget = widgets.IntSlider(min=0, max=100, step=1, value=params['threshold'], description='Threshold')
number_digits_widget = widgets.IntSlider(min=-1, max=6, step=1, value=params['number-digits'], description='Number of Digits')
foreground_widget = widgets.Dropdown(options=['black', 'white'], value=params['foreground'], description='Foreground')
background_widget = widgets.Dropdown(options=['black', 'white'], value=params['background'], description='Background')

def text_table(xs, ncol=8, width=5):
    out = []
    while True:
        this_row = xs[:ncol]
        line = "".join(f"{e:>{width}}" for e in this_row)
        out.append(line)
        xs = xs[ncol:]
        if not xs:
            break
    
    return "\n".join(out)

def update_and_run(threshold, number_digits, white_border):
    # Update the parameters
    params = {
    "threshold": threshold,
    'number-digits': number_digits
    }

    commands = [
        (f"white_border {white_border}" if (white_border > 0) else ""),
    ]
    
    # Run the ssocr function
#     return pd.DataFrame(
#         dict(
#             image=[os.path.basename(e) for e in cadence_images],
#             result = [run_ssocr(image, params, commands) for image in cadence_images],))
    results = [run_ssocr(image, params, commands) for image in images]
    print(text_table(results))



# Create widgets for the additional parameters
iter_threshold_widget = widgets.Checkbox(value=params['iter-threshold'], description='Iterative Threshold')
number_pixels_widget = widgets.IntSlider(min=0, max=100, step=1, value=params['number-pixels'], description='Number of Pixels')
ignore_pixels_widget = widgets.IntSlider(min=0, max=100, step=1, value=params['ignore-pixels'], description='Ignore Pixels')
one_ratio_widget = widgets.FloatSlider(min=0.0, max=2.0, step=0.1, value=params['one-ratio'], description='One Ratio')
minus_ratio_widget = widgets.FloatSlider(min=0.0, max=2.0, step=0.1, value=params['minus-ratio'], description='Minus Ratio')
dec_h_ratio_widget = widgets.FloatSlider(min=0.0, max=2.0, step=0.1, value=params['dec-h-ratio'], description='Dec H Ratio')
dec_w_ratio_widget = widgets.FloatSlider(min=0.0, max=2.0, step=0.1, value=params['dec-w-ratio'], description='Dec W Ratio')
process_only_widget = widgets.Checkbox(value=params['process-only'], description='Process Only')
print_info_widget = widgets.Checkbox(value=params['print-info'], description='Print Info')
adjust_gray_widget = widgets.Checkbox(value=params['adjust-gray'], description='Adjust Gray')
luminance_widget = widgets.Text(value=params['luminance'], description='Luminance')
print_spaces_widget = widgets.Checkbox(value=params['print-spaces'], description='Print Spaces')
space_factor_widget = widgets.FloatSlider(min=0.0, max=2.0, step=0.1, value=params['space-factor'], description='Space Factor')
space_average_widget = widgets.Checkbox(value=params['space-average'], description='Space Average')
ascii_art_segments_widget = widgets.Checkbox(value=params['ascii-art-segments'], description='ASCII Art Segments')
print_as_hex_widget = widgets.Checkbox(value=params['print-as-hex'], description='Print as Hex')
omit_decimal_point_widget = widgets.Checkbox(value=params['omit-decimal-point'], description='Omit Decimal Point')
charset_widget = widgets.Text(value=params['charset'], description='Charset')
white_border_widget = widgets.IntSlider(min=0, max=10, step=1, value=0, description='White Border')

# Interactive widget with all parameters
widgets.interact(
    update_and_run,
    threshold=threshold_widget,
    number_digits=number_digits_widget,
    white_border=white_border_widget,
)


In [None]:
images

In [None]:
4

In [None]:
[e for e in dir(cv2) if "OTSU" in e]

In [None]:
pimg = crop_to_content(img)
plt.imshow(pimg)

In [None]:
def preprocess_image(image, blur_kernel_size, dilation_size, edge_size):
    # 1. Blurring
    image = cv2.GaussianBlur(image, (blur_kernel_size, blur_kernel_size), 0)
    
    # 2. Thresholding
    _, image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    # 3. Dilation
    kernel = np.ones((dilation_size, dilation_size), np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    
    # 4. Edge removal
    image[edge_size:-edge_size, edge_size:-edge_size] = 255

    return image

def interactive_preprocessing(image):
    interact(lambda blur_kernel_size, dilation_size, edge_size: plt.imshow(preprocess_image(image, blur_kernel_size, dilation_size, edge_size), cmap='gray'),
             blur_kernel_size=IntSlider(min=1, max=11, step=2, value=3),
             dilation_size=IntSlider(min=1, max=11, step=2, value=3),
             edge_size=IntSlider(min=0, max=50, step=1, value=0))

# Example usage:
# img = cv2.imread('image.png', cv2.IMREAD_GRAYSCALE)
interactive_preprocessing(((1-pimg)*255).astype(np.uint8))


In [None]:
def run_length_score(binary_image):
    pixel_counts = dict(zip(*np.unique(im1.ravel(), return_counts=True)))
    assert len(pixel_counts)<=2, "Should be a binary image."
    
    def row_run_lengths(row):
        return np.sum(row * (row >= np.roll(row, 1)))

    def column_run_lengths(col):
        return np.sum(col * (col >= np.roll(col, 1)))

    row_scores = np.apply_along_axis(row_run_lengths, 1, binary_image)
    col_scores = np.apply_along_axis(column_run_lengths, 0, binary_image)

    return np.sum(row_scores) + np.sum(col_scores)

def rotate_image(image, angle, border_value=0):
    rows, cols = image.shape
    rotation_matrix = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    rotated_image = cv2.warpAffine(image, rotation_matrix, (cols, rows), borderValue=border_value)
    return rotated_image

def straighten_image_maximize_run_length(binary_image, angle_range=(-10, 10), num_angles=100):
    best_score = -1
    best_angle = 0

    angles = np.linspace(angle_range[0], angle_range[1], num_angles)

    for angle in angles:
        rotated_image = rotate_image(binary_image, angle)
        score = run_length_score(rotated_image)
        
        if score > best_score:
            best_score = score
            best_angle = angle

    # Apply the best rotation angle to the image
    straightened_image = rotate_image(binary_image, best_angle)

    return straightened_image


In [None]:
import cv2
import numpy as np
from skimage.transform import hough_line, hough_line_peaks


def correct_rotation(image):
    edges = cv2.Canny(image, 50, 150)
    hspace, angles, distances = hough_line(edges)

    _, _, angles = hough_line_peaks(hspace, angles, distances)
    mean_angle = np.mean(angles)

    rotation_angle = np.rad2deg(mean_angle) % 180
    if rotation_angle > 90:
        rotation_angle -= 180

    print(f"{rotation_angle=}")

    rows, cols = image.shape
    rotation_matrix = cv2.getRotationMatrix2D((cols / 2, rows / 2), rotation_angle, 1)
    rotated_image = cv2.warpAffine(
        image, rotation_matrix, (cols, rows), borderMode=cv2.BORDER_REPLICATE
    )

    return rotated_image


def rotate_image(image, angle):
    rows, cols = image.shape
    rotation_matrix = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    rotated_image = cv2.warpAffine(
        image, rotation_matrix, (cols, rows), borderMode=cv2.BORDER_REPLICATE
    )
    return rotated_image


def correct_rotation(image, angle_range=(-10, 10), num_angles=100):
    best_score = float("inf")
    best_angle = 0

    angles = np.linspace(angle_range[0], angle_range[1], num_angles)

    for angle in angles:
        rotated_image = rotate_image(image, angle)
        moments = cv2.moments(rotated_image)
        score = moments["mu02"] + moments["mu20"]

        if score < best_score:
            best_score = score
            best_angle = angle

    print(f"{best_angle=}")

    corrected_image = rotate_image(image, best_angle)
    return corrected_image


def correct_rotation(image):
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    angles = []
    for contour in contours:
        _, _, w, h = cv2.boundingRect(contour)
        if w > 0 and h > 0:
            aspect_ratio = float(w) / h
            if 0.2 < aspect_ratio < 0.8:
                _, (w, h), angle = cv2.minAreaRect(contour)
                if w < h:
                    angle += 90
                angles.append(angle)

    if not angles:
        return image

    median_angle = np.median(angles)
    corrected_image = rotate_image(image, median_angle)
    return corrected_image


def correct_rotation(image, angle_range=(-10, 10)):
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    angles = []
    for contour in contours:
        print(contour)
        _, _, w, h = cv2.boundingRect(contour)
        if w > 0 and h > 0:
            aspect_ratio = float(w) / h
            if 0.2 < aspect_ratio < 0.8:
                _, (w, h), angle = cv2.minAreaRect(contour)
                if w < h:
                    angle += 90
                angles.append(angle)

    if not angles:
        return image

    median_angle = np.median(angles)

    # Limit the rotation angle to the specified range
    median_angle = max(angle_range[0], min(median_angle, angle_range[1]))

    corrected_image = rotate_image(image, median_angle)
    return corrected_image


def preprocess_seven_segment_image(image_path):
    image = cv2.imread(image_path)
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    _, binary_image = cv2.threshold(
        grayscale_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
    )

    rotated_image = correct_rotation(binary_image)
    # rotated_image = binary_image

    # kernel = np.ones((2, 2), np.uint8)
    # cleaned_image = cv2.morphologyEx(rotated_image, cv2.MORPH_CLOSE, kernel)
    cleaned_image = rotated_image

    return cleaned_image


# Example usage:
image_path = "test-number.png"
preprocessed_image = preprocess_seven_segment_image(image_path)
plt.imshow(preprocessed_image)

In [None]:
!ls /tmp/tmpnzivpd0q/cutouts/speed*.png

In [None]:
im1, im2 = ocr.process_image("test-number.png")

In [None]:
np.unique(rotate_image(im1, 1).ravel())

In [None]:
plt.imshow(im2)

In [None]:
def load_image(image_path):
    # Load the image
    # image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
    return np.fliplr(np.flipud(image))


def deskew_image(image):
    # Convert the image to grayscale if it's not already
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # Apply a binary threshold
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # Get the coordinates of the non-zero pixels
    coords = np.column_stack(np.where(thresh > 0))

    # Compute the minimum rotated bounding box
    angle = cv2.minAreaRect(coords)[-1]
    print(f"{angle=}")

    # Correct the angle
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle

    # Get the image size
    (h, w) = gray.shape[:2]

    # Calculate the center of the image
    center = (w // 2, h // 2)

    # Rotate the image using the computed angle
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(
        gray, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE
    )

    return rotated


def preprocess_image(image):
    # Resize the image
    scale_percent = 150
    width = int(image.shape[1] * scale_percent / 100)
    height = int(image.shape[0] * scale_percent / 100)
    dim = (width, height)
    resized = cv2.resize(image, dim, interpolation=cv2.INTER_LINEAR)

    # Apply binary thresholding
    _, thresholded = cv2.threshold(
        resized, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU
    )

    # Apply morphological operations
    kernel = np.ones((1, 1), np.uint8)
    opening = cv2.morphologyEx(thresholded, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)

    # Remove noise
    denoised = cv2.fastNlMeansDenoising(
        closing, h=20, templateWindowSize=7, searchWindowSize=21
    )

    return denoised


def segment_image(image, k=3):
    # Reshape the image to a 2D array of pixels
    pixels = image.reshape((-1, 3))

    # Convert to floating-point
    pixels = np.float32(pixels)

    # Define the criteria and apply k-means clustering
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)

    _, labels, centers = cv2.kmeans(
        pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS
    )

    # Convert back to 8-bit values
    centers = np.uint8(centers)

    # Map the segmented image
    segmented_image = centers[labels.flatten()]

    # Reshape the image to its original dimensions
    segmented_image = segmented_image.reshape(image.shape)

    return segmented_image

In [None]:
# p9.ggplot(pd.DataFrame(dict(x=np.ravel(img))), p9.aes(x="x")) + p9.geom_histogram()

In [None]:
plt.rcParams["figure.figsize"] = (6, 12)
img = load_image(root / "0117.png")
img = img[900:-400, :, :]
plt.imshow(img)

plt.imshow(segment_image(img, k=10))
# plt.imshow(img)

# plt.imshow(deskew_image(img))

# plt.imshow(np.where(img < 100, img, 255))


# (img, dpi=600)

In [None]:
from pytesseract import pytesseract

# Set the path to the Tesseract executable
pytesseract.tesseract_cmd = r'/path/to/tesseract'

# Preprocess the image
image_path = 'path/to/your/image.jpg'
preprocessed_image = preprocess_image(image_path)

# Run Tesseract OCR on the preprocessed image
text = pytesseract.image_to_string(preprocessed_image)
print(text)
