Document Detection comprises:
1. Read image
2. Preprocessing
3. Locate document boundaries
4. Deskew and extract

In [1]:
import os
import sys
import cv2

sys.path.insert(1, os.path.join(sys.path[0], '..'))
from preprocessors import RotationCorrector, \
                            Resizer, \
                            OtsuThresholder, \
                            FastDenoiser, \
                            Closer, \
                            Opener, \
                            Grayscaler, \
                            ReverseGrayscaler, \
                            GaussianBlur, \
                            EdgeDetector
from document_edge_detector import DocumentEdgeDetector
from page_extractor import PageExtractor

# for showing images in notebook
from matplotlib import pyplot as plt
import numpy as np

In [11]:
test_path = './data/img2.jpg'

In [3]:
class DocumentWorkflow:
    def __init__(self, 
                 preprocessor_pipeline,
                 document_edge_detector,
                 page_extractor,
                 force_output_process=False):
        self.preprocessors = preprocessor_pipeline
        self.document_edge_detector = document_edge_detector
        self.page_extractor = page_extractor
        self.force_output_process = force_output_process
        
    def __call__(self, image):
        if self.force_output_process:
            self.preprocessors.set_force_output_process()
            self.document_edge_detector.set_force_output_process()
            self.page_extractor.set_force_output_process()
        
        colored_image = image.copy()
        image = self.preprocessors(image)
        if self.force_output_process:
            cv2.imwrite('output/stage_1_preprocess.jpg', image)
        
        quad = self.document_edge_detector(image)
        
        if self.force_output_process:
            bounded = DocumentEdgeDetector.overlay_document_bounding_boxes(colored_image, quad)
            cv2.imwrite('output/stage_2_bounding_boxes.jpg', bounded)
        
        warped = self.page_extractor(colored_image, quad)
        
        if self.force_output_process:
            cv2.imwrite('output/stage_3_warped.jpg', warped)
        
        return warped

Common Preprocessing Steps:
1. Threshold/binarize to black and white
2. De-noise
3. Sharpen
4. Clean up image using Morphological Transforms
5. Increase contrast of image
6. Invert image (swap black and white pixels)

Taking inspiration from @Shakleen (https://github.com/Shakleen/Python-Document-Detector and https://medium.com/intelligentmachines/document-detection-in-python-2f9ffd26bf65)


In [4]:
class PreprocessorPipeline:
    def __init__(self):
        self.filters = []
    def set_force_output_process(self):
        for f in self.filters:
            f.output_process = True
        
    def __call__(self, image):
        for f in self.filters:
            image = f(image)
        return image

Main Workflow

In [13]:
image = cv2.imread(test_path)

preprocessors = PreprocessorPipeline()
preprocessors.filters += [GaussianBlur(),
                          FastDenoiser(),
                          OtsuThresholder(),
                          Closer(),
                          EdgeDetector()]

document_edge_detector = DocumentEdgeDetector()

page_extractor = PageExtractor()

document_workflow = DocumentWorkflow(preprocessors,
                                     document_edge_detector,
                                     page_extractor,
                                     force_output_process=True)
output = document_workflow(image)

# cv2.imwrite('output/output.jpg', output)
# plt.imshow(output)

Make Drawing Brush

In [14]:
import pygame

In [21]:
BLUE = (0,0,255)
BLACK = (0,0,0)

# sizes of src and dst
src = page_extractor.src
dst = page_extractor.dst

# perspective transform
m = page_extractor.m

# dimensions of dst
# format: (height, width)
src_dims = image.shape[:2]
dst_dims = (page_extractor.max_height, page_extractor.max_width)

### get images
# scale source to the same height
# resized = resized source
resizer = Resizer(height = dst_dims[0], output_process = True)
resized = resizer(image)
resized_dims = resizer.get_new_dims(image)

# setup image planes
# dimensions are of the form (height, width)
original = resized # shape: resized_dims
skewed = output # shape: dst

# pygame accepts images with dimension format (width, height)
original_surface = pygame.image.frombuffer(original.flatten(), resized_dims[::-1], 'RGB')
skewed_surface = pygame.image.frombuffer(skewed.flatten(), dst_dims[::-1], 'RGB')

# create game window
image_gap_size = 20
gameDisplay = pygame.display.set_mode((original.shape[1]+skewed.shape[1]+image_gap_size, original.shape[0]))
pygame.display.set_caption('i require healing')

brush = None
brush_surface = pygame.surface.Surface((dst_dims[1], dst_dims[0]), pygame.SRCALPHA)
brush_surface.convert_alpha()
brush_offset = (-(original.shape[1]+image_gap_size), 0)
unskewed_resizer = Resizer(height = dst_dims[0])

game_exit = False
# clear buffer
gameDisplay.fill(BLACK)
# draw images
gameDisplay.blit(original_surface, (0, 0))
gameDisplay.blit(skewed_surface, (resized_dims[1]+image_gap_size, 0))
gameDisplay.blit(brush_surface, (resized_dims[1]+image_gap_size, 0))

prev_brush = None
while not game_exit:

    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            game_exit = True
        elif event.type == pygame.KEYDOWN:
            if event.key == pygame.K_ESCAPE:
                game_exit = True
        elif event.type == pygame.MOUSEBUTTONDOWN:
            if event.button == 1: # left button pressed
                brush = event.pos
        elif event.type == pygame.MOUSEBUTTONUP:
            if event.button == 1: # left button released
                brush = None
                prev_brush = None
        elif event.type == pygame.MOUSEMOTION:
            if brush: # left button still pressed
                brush = event.pos

    # draw brush in buffer
    if brush:
        brush_loc = [sum(c) for c in zip([brush[0], brush[1]], brush_offset)]
        try:
            # pygame.draw.circle(brush_surface, BLUE, brush_loc, 2)
            if prev_brush:
                pygame.draw.line(brush_surface, BLUE, brush_loc, prev_brush, 4)
            gameDisplay.blit(brush_surface, (resized_dims[1]+image_gap_size, 0))
            brush_data = pygame.surfarray.array3d(brush_surface).swapaxes(0,1)
            brush_data_alpha = pygame.surfarray.array_alpha(brush_surface).swapaxes(0,1)
            
            brush_img = cv2.cvtColor(brush_data, cv2.COLOR_RGB2RGBA)
            brush_img[:,:,3] = brush_data_alpha
            
            # unskew brush_img
            unskewed_img = page_extractor.reverse_skew(brush_img)
            unskewed_img = unskewed_resizer(unskewed_img)
            unskewed_brush_surface = pygame.image.frombuffer(unskewed_img , resized_dims[::-1], 'RGBA')
            unskewed_brush_surface.convert_alpha()
            gameDisplay.blit(unskewed_brush_surface, (0, 0))
            
            prev_brush = brush_loc
        except:
            raise
            game_exit = True
    # draw buffer
    pygame.display.update()
    
pygame.quit()

(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960, 3)
(1280, 960