In [59]:
import numpy as np
import cv2
from imutils.perspective import four_point_transform
from imutils import contours
from crossword_utils import *

def get_rows(centers, row_amt, row_h):
    centers = np.array(centers)
    d = row_h / row_amt
    for i in range(row_amt):
        f = centers[:, 1] - d * i
        a = centers[(f < d) & (f > 0)]
        yield a[a.argsort(0)[:, 0]]

def label_contour(image, c, i, color=(0, 255, 0), thickness=2):
    # compute the center of the contour area and draw a circle
    # representing the center
    M = cv2.moments(c)
    cX = int(M["m10"] / M["m00"])
    cY = int(M["m01"] / M["m00"])

    # draw the contour and label number on the image
    cv2.drawContours(image, [c], -1, color, thickness)
    cv2.putText(image, "#{}".format(i + 1), (cX - 20, cY), cv2.FONT_HERSHEY_PLAIN,
                1, (255, 0, 0), 1)

    # return the image with the contour number drawn on it
    return image

def find_cells(img, drawImage, Debug = False):
    """
    Find the cells of a grid
    """
    img_area = img.shape[0] * img.shape[1]

    (cnts, _) = cv2.findContours(img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    # take the largest 200
    # cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:200]

    if (Debug):
        show_wait_destroy('all contours', cv2.drawContours(drawImage.copy(), cnts, -1, (0, 255, 0), 1))

    # Array containing the cropped cell image and its position in the grid
    cells = []
    for c in cnts:
        # Approximate the contour in order to determine whether the contour is a quadrilateral
        peri = cv2.arcLength(c, True)
        epsilon = 0.02 * peri # originally 0.017
        approx = cv2.approxPolyDP(c, epsilon, True)
        area = cv2.contourArea(approx)
        
        # https://docs.opencv.org/3.4/dd/d49/tutorial_py_contour_features.html
        # Straight Bounding Rectangle
        rect = cv2.minAreaRect(approx)

        # Rotated Rectangle
        box = cv2.boxPoints(rect)
        box = np.int0(box)

        # We are looking for a contour of a specific area in relation to the grid size
        # and that is roughly quadrilateral
        # We filter for areas that are too small or too large in relation to the whole image
        percentage = (area * 100) / img_area
        if percentage > 0.01 and percentage < 2 and len(approx) == 4:
            # Using masking, we crop the cell into its own 28 by 28 pixel image
            mask = np.zeros_like(img)
            cv2.drawContours(mask, [c], -1, 255, -1)

            # show_wait_destroy("mask", mask)

            (y, x) = np.where(mask == 255)

            (top_y, top_x) = (np.min(y), np.min(x))
            (bottom_y, bottom_x) = (np.max(y), np.max(x))
            cell = image[top_y : bottom_y + 1, top_x : bottom_x + 1]

            cell = cell.copy()
            # cell = cv2.resize(cell, (28, 28))

            # We also find the centroid of the cell in relation
            # to the grid
            M = cv2.moments(c)
            cX = int(M["m10"] / M["m00"])
            cY = int(M["m01"] / M["m00"])

            cells.append(({"img": cell, "pos": (cX, cY), "cnt": c, "rect": rect, "box": box}))

            # show_wait_destroy("cell", cell)

    if (Debug):
        # select the cnt column
        cnts = [o["cnt"] for o in cells]

        # draw the selected contours
        show_wait_destroy('selected contours', cv2.drawContours(drawImage.copy(), cnts, -1, (255, 0, 0), 1))

        # label the selected contours
        imageCopyUnsorted = drawImage.copy()
        for (i, c) in enumerate(cnts):
            label_contour(imageCopyUnsorted, c, i, color=(240, 0, 159), thickness=1)
        
        show_wait_destroy('labelled contours', imageCopyUnsorted)

        # select the position column
        centroids = [o["pos"] for o in cells]

        # text green
        textColor=(255, 0, 0)

        # points blue
        pointColor=(0, 255, 0)

        labeled=image.copy()
        for index, pt in enumerate(centroids):
            cv2.putText(labeled, str(index), (int(pt[0]), int(pt[1])), cv2.FONT_HERSHEY_PLAIN, 1, textColor, 1)
            cv2.circle(labeled, (int(pt[0]), int(pt[1])), 3, pointColor, -1)

        show_wait_destroy("centroids", labeled)


    return cells

# https://gist.githubusercontent.com/qgolsteyn/7da376ced650a2894c2432b131485f5d/raw/5a7b2e0150dfce942cc3cd1e28c3e2c8c0783936/main.py
def get_grid(cells, drawImage, Debug = False):
    """
    Given a list of cells and they position, return a 2D array representing
    a grid, where each element of this 2D array contains of the value of the grid
    at that position.
    """
    grid = []

    if (Debug):
        # select the position column
        centroids = [o["pos"] for o in cells]
        # for c in centroids:
        #     print('extracted %s' % str(c))

        sorted=drawImage.copy()
        h, w, c = sorted.shape
        count = 0
        for row in get_rows(centroids, 25, h):
            cv2.polylines(sorted, [row], False, (255, 0, 255), 1)
            for x, y in row:
                count += 1
                cv2.circle(sorted, (x, y), 5, (0, 0, 255), -1)  
                cv2.putText(sorted, str(count), (x - 10, y + 5), 1, cv2.FONT_HERSHEY_PLAIN, (0, 255, 255), 1)

        show_wait_destroy("sorted", sorted)

    return grid

image = cv2.imread('crossword1.png')

show_wait_destroy("raw image", image)

# Transform source image to gray if it is not already
if len(image.shape) != 2:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
    gray = image

show_wait_destroy("gray image", gray)

grayOriginal = gray.copy();

# using a big blocksize seem to work well (blocksize = 51, c = 11)
thresh = cv2.adaptiveThreshold( 
    gray,
    maxValue=255.0,
    adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    thresholdType=cv2.THRESH_BINARY_INV,
    blockSize=71,
    C=19
)

# Show binary image
show_wait_destroy("thresh", thresh)

# removeNoise(thresh, 0.5)
# show_wait_destroy("thresh2", thresh)

# Fix horizontal and vertical lines (thickening)
# vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,2))
# thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, vertical_kernel, iterations=1)

# horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,1))
# thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, horizontal_kernel, iterations=1)

# show_wait_destroy("thresh3", thresh)

cells = find_cells(thresh, image, True)
# for cell in cells:
#     print('pre %s' % str(cell["pos"]))

# sort the contours
# (sorted_cnts, boundingBoxes) = contours.sort_contours(cnts, method="left-to-right")
# loop over the sorted contours and label them
# imageCopySorted = image.copy()
# for (i, c) in enumerate(sorted_cnts):
    # label_contour(imageCopySorted, c, i, color=(240, 0, 159), thickness=1)
# show_wait_destroy('sorted_cnts', imageCopySorted)

# sort cells by centroids
# cells = sorted(cells , key=lambda cell: [cell["pos"][1], cell["pos"][0]])
# for c in cells:
#     print('post %s' % str(c["pos"]))

grid = get_grid(cells, image, True)

# cells = [o["img"] for o in cells]
# for index, c in enumerate(cells):
#     show_wait_destroy("cell", c)    
