In [1]:
import math
import cv2
import numpy as np
import os
from glob import glob
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
DEBUG = True

In [2]:
# Break up data into consecutive groups.
def consecutive_groups(x, stepsize=1):
    return [ i for i in range(1, len(x), stepsize) if x[i] != x[i-1] ]

In [3]:
def adjust_gamma(image, gamma=1.0):
    invGamma = 1.0 / gamma
    table = np.array([((i / 255.0) ** invGamma) * 255
        for i in np.arange(0, 256)]).astype("uint8")
    return cv2.LUT(image, table)

In [8]:
def get_horz_vert(inpath, ksize, erosion_iterations, gamma=1.0):
    img = cv2.imread(inpath, cv2.IMREAD_GRAYSCALE)
    img = adjust_gamma(img, gamma) if gamma != 1.0 else img
    ret, thresh = cv2.threshold(img.copy(), 127, 255, cv2.THRESH_BINARY)
    eroded = cv2.erode(thresh, kernel, iterations = erosion_iterations)
    horizontal = consecutive_groups(eroded[0])
    vertical = consecutive_groups(np.transpose(eroded)[0])

    return horizontal, vertical

In [5]:
def save_cropped(inpath, outpath, y1, y2, x1, x2):
    img = cv2.imread(inpath)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    framed = img[y1:y2, x1:x2]
    z = Image.fromarray(framed)
    z.save(outpath)

In [9]:
ksize = 5
kernel_size = (ksize, ksize)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, kernel_size)
erosion_iterations = 10

moveup = 170
padding = 15
docwidth = 1712
docheight = 2832

for f in range(1, 70):    
    filename = 'john{:04d}.jpg'.format(f)
    inpath = '4.rotated/{}'.format(filename)
    outpath = '7.final/{}'.format(filename)

    horizontal, vertical = get_horz_vert(inpath, ksize, erosion_iterations)
    
    if len(horizontal) >= 4 and len(vertical) >= 4:
        x, y = horizontal[0], vertical[0]
        save_cropped(inpath, outpath, y-moveup, y+docheight-padding, x+padding, x+docwidth-padding)

    else:
        horizontal, vertical = get_horz_vert(inpath, ksize, erosion_iterations, gamma=0.5)
        x, y = horizontal[0], vertical[0]
        if x < 500 and y < 500:
            save_cropped(inpath, outpath, y-moveup, y+docheight-padding, x+padding, x+docwidth-padding)
            print(outpath)
        else:
            print(filename, horizontal, vertical)

7.final/john0004.jpg
7.final/john0005.jpg
7.final/john0009.jpg
7.final/john0016.jpg
7.final/john0021.jpg
7.final/john0022.jpg
7.final/john0023.jpg
7.final/john0024.jpg
7.final/john0025.jpg
7.final/john0026.jpg
7.final/john0028.jpg
7.final/john0030.jpg
7.final/john0031.jpg
7.final/john0032.jpg
7.final/john0033.jpg
7.final/john0034.jpg
7.final/john0035.jpg
7.final/john0037.jpg
7.final/john0038.jpg
7.final/john0039.jpg
7.final/john0040.jpg
7.final/john0041.jpg
7.final/john0047.jpg
7.final/john0048.jpg
7.final/john0049.jpg
7.final/john0050.jpg
7.final/john0051.jpg
7.final/john0052.jpg
7.final/john0054.jpg
7.final/john0055.jpg
7.final/john0066.jpg
7.final/john0067.jpg
