In [106]:
import cv2
import time
import numpy as np


def read_img(path):
    img = cv2.imread(path)

    return img


def save_img(desc, in_img, out_img, out_path):
    fontScale = 2
    thickness = 3   
    white = (255, 255, 255)
    green = (0, 255, 0)
    lineType = cv2.LINE_AA
    font = cv2.FONT_HERSHEY_SIMPLEX
    
    # Input image
    (text_width, text_height), _ = cv2.getTextSize(desc, font, fontScale, thickness)
    h, w = 3*text_height, text_width+3

    tmp = in_img[0:h, 0:w]

    bg_rect = np.zeros(tmp.shape, dtype=np.uint8)
    bg_rect = cv2.putText(bg_rect, 'before', (3,text_height+5), font, fontScale, 
                    white, thickness, lineType, False)
    bg_rect = cv2.putText(bg_rect, desc, (3,5*text_height//2), font, fontScale, 
                    green, thickness, lineType, False)
    
    tmp = cv2.addWeighted(tmp, 0.5, bg_rect, 0.5, 1.0)
    
    in_img[0:h, 0:w] = tmp

    # Output image
    (text_width, text_height), _ = cv2.getTextSize('after', font, fontScale, thickness)
    h, w = 3*text_height//2, text_width+3

    tmp = out_img[0:h, 0:w]

    bg_rect = np.zeros(tmp.shape, dtype=np.uint8)
    bg_rect = cv2.putText(bg_rect, 'after', (3,text_height+5), font, fontScale, 
                    white, thickness, lineType, False)
    
    tmp = cv2.addWeighted(tmp, 0.5, bg_rect, 0.5, 1.0)
    
    out_img[0:h, 0:w] = tmp

    # Combine
    in_h, in_w, _ = in_img.shape
    out_h, out_w, _ = out_img.shape
    if in_h/in_w >= 1.0:
        new_h = max(in_h, out_h)
        new_w = in_w + out_w
        combine_img = np.zeros([new_h, new_w, 3], np.uint8)
        
        if new_h == in_h:
            d = (new_h - out_h)//2
            combine_img[0:in_h, 0:in_w] = in_img
            combine_img[d:out_h+d, in_w:new_w] = out_img
        else:
            d = (new_h - in_h)//2
            combine_img[d:in_h+d, 0:in_w] = in_img
            combine_img[0:out_h, in_w:new_w] = out_img

    else:
        new_h = in_h + out_h
        new_w = max(in_w, out_w)
        combine_img = np.zeros([new_h, new_w, 3], np.uint8)

        if new_w == in_w:
            d = (new_w - out_w)//2
            combine_img[0:in_h, 0:in_w] = in_img
            combine_img[in_h:new_h, d:out_w+d] = out_img
        else:
            d = (new_w - in_w)//2
            combine_img[0:in_h, d:in_w+d] = in_img
            combine_img[in_h:new_h, 0:out_w] = out_img

        
    cv2.imwrite(out_path, combine_img)

In [107]:
'''
http://aishelf.org/deskew/
'''

# !pip install -q deskew

from skimage.transform import rotate
from skimage.color import rgb2gray

from deskew import determine_skew


def deskew_1(in_img):
    '''deskew'''
    
    grayscale = rgb2gray(in_img)
    angle = determine_skew(grayscale)
    
    rotated = rotate(in_img, angle, resize=True, cval=1) * 255
    out_img = rotated.astype(np.uint8)

    return in_img, out_img

In [109]:
'''
https://pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/
'''

def deskew_2(in_img):
    '''threshold'''
    
    gray = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(gray)

    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    
    coords = np.column_stack(np.where(thresh > 0))
    angle = cv2.minAreaRect(coords)[-1]

    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle

    (h, w) = in_img.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    out_img = cv2.warpAffine(in_img, M, (w, h),
	                flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    
    return in_img, out_img

In [111]:
'''
https://becominghuman-ai.translate.goog/how-to-automatically-deskew-straighten-a-text-image-using-opencv-a0c30aed83df?gi=5548544c69b5&_x_tr_sl=auto&_x_tr_tl=vi&_x_tr_hl=vi&_x_tr_pto=wapp
'''

def getSkewAngle(cvImage) -> float:    
    newImage = cvImage.copy()
    gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (9, 9), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
    dilate = cv2.dilate(thresh, kernel, iterations=5)

    contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key = cv2.contourArea, reverse = True)

    largestContour = contours[0]
    minAreaRect = cv2.minAreaRect(largestContour)

    angle = minAreaRect[-1]
    if angle < -45:
        angle = 90 + angle
    return -1.0 * angle

def rotateImage(cvImage, angle: float):
    newImage = cvImage.copy()
    (h, w) = newImage.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return newImage

def deskew_3(in_img):
    '''contours'''

    angle = getSkewAngle(in_img)
    out_img = rotateImage(in_img, -1.0 * angle)

    return in_img, out_img 

In [112]:
'''
Projection Profile
https://medium-com.translate.goog/@9sphere/machine-vision-recipes-deskewing-document-images-e17827894c34?_x_tr_sl=auto&_x_tr_tl=vi&_x_tr_hl=vi&_x_tr_pto=wapp
'''

from skimage.filters import sobel
from skimage.util import invert

def horizontal_projections(sobel_image):
    return np.sum(sobel_image, axis=1)

def get_skew_angle(img):
        sobel_image = invert(sobel(img))
        predicted_angle = 0
        highest_hp = 0
        for index, angle in enumerate(range(-10, 10)):
            hp = horizontal_projections(rotate(sobel_image, angle, cval=1))
            median_hp = np.median(hp)
            if highest_hp < median_hp:
                predicted_angle = angle
                highest_hp = median_hp

        finetuning_range = np.arange(predicted_angle - 1.0, predicted_angle + 1.0, 0.01)
        for index, angle in enumerate(finetuning_range):
            hp = horizontal_projections(rotate(sobel_image, angle, cval=1))
            median_hp = np.median(hp)
            if highest_hp < median_hp:
                predicted_angle = angle
                highest_hp = median_hp
        
        return predicted_angle

def deskew_4(in_img):
    '''Projection Profile'''
    
    image = rgb2gray(in_img)
    angle = get_skew_angle(image)
    
    rotated = rotate(in_img, angle, resize=True, cval=1) * 255
    out_img = rotated.astype(np.uint8)

    return in_img, out_img

In [113]:
'''
Hough Transform
https://medium-com.translate.goog/@9sphere/machine-vision-recipes-deskewing-document-images-e17827894c34?_x_tr_sl=auto&_x_tr_tl=vi&_x_tr_hl=vi&_x_tr_pto=wapp
'''

from skimage.transform import hough_line, hough_line_peaks
from skimage.feature import canny
from scipy.stats import mode

def skew_angle_hough_transform(image):
    edges = canny(image)
    tested_angles = np.deg2rad(np.arange(0.1, 180.0))
    h, theta, d = hough_line(edges, theta=tested_angles)
    
    accum, angles, dists = hough_line_peaks(h, theta, d)
    
    most_common_angle = mode(np.around(angles, decimals=2))[0]
    
    skew_angle = np.rad2deg(most_common_angle - np.pi/2)
    return skew_angle

def deskew_5(in_img):
    '''Hough Transform'''
    
    image = rgb2gray(in_img)
    angle = skew_angle_hough_transform(image)[0]

    rotated = rotate(in_img, angle, resize=True, cval=1) * 255
    out_img = rotated.astype(np.uint8)

    return in_img, out_img

In [114]:
'''
Fourier Transform
https://medium-com.translate.goog/@9sphere/machine-vision-recipes-deskewing-document-images-e17827894c34?_x_tr_sl=auto&_x_tr_tl=vi&_x_tr_hl=vi&_x_tr_pto=wapp
'''
from scipy import stats

def detect_skew_angle_fourier_tranform(image):
    edges = canny(image)
    f = np.fft.fft2(edges)
    fshift = np.fft.fftshift(f)
    
    magnitude_spectrum = np.log(np.abs(fshift))
    
    r,c = magnitude_spectrum.shape
    magnitude_spectrum[int(r/2),int(c/2)] = 0
    
    frequency_and_indexes = []
    for ri in range(r):
        for ci in range(c):
            frequency_and_indexes.append([magnitude_spectrum[ri][ci], ci, ri])
    
    frequency_and_indexes = np.array(frequency_and_indexes)
    frequency_and_indexes = frequency_and_indexes[frequency_and_indexes[:,0].argsort()[::-1]][:30]
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(frequency_and_indexes[:,1],frequency_and_indexes[:,2])
    rotation_angle = np.round(np.rad2deg(np.arctan(slope)-np.pi/2), decimals=2)
    
    return rotation_angle

def deskew_6(in_img):
    '''Fourier Transform'''
    
    image = rgb2gray(in_img)
    angle = detect_skew_angle_fourier_tranform(image)

    rotated = rotate(in_img, angle, resize=True, cval=1) * 255
    out_img = rotated.astype(np.uint8)

    return in_img, out_img

In [121]:
'''
ARS
https://own.arsvietnam.com/gitlab/hoabd/ocr-bcqt/-/blob/master/modules/correct_align/align_correcter.py
'''

from functools import partial
from scipy.ndimage import interpolation as inter

num_thread = 4
limit = 5
delta = 0.5

def determine_score(angle, arr):
    data = inter.rotate(arr, angle, reshape=False, order=0)
    histogram = np.sum(data, axis=1)
    score = np.sum((histogram[1:] - histogram[:-1]) ** 2)
    return score

def deskew_7(in_img):
    '''[ARS'''

    old_shape = in_img.shape
    image = cv2.resize(in_img, None, fx=0.5, fy=0.5)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 25, 30)

    angles = np.arange(-limit, limit + delta, delta)
    compute_score = partial(determine_score, arr=thresh)

    scores = list(map(compute_score, angles))
    best_angle = angles[scores.index(max(scores))]

    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    out_img = cv2.resize(rotated, (old_shape[1], old_shape[0]))

    return in_img, out_img

In [123]:
# debug
method = 'deskew_7'

in_path = '/hdd/nguyenlc/dms/deskew/images/skew_input/image_6_0.png'
# in_path = '/hdd/nguyenlc/dms/deskew/images/skew_input/image_19_2.png'

in_img = read_img(in_path)

out_folder = '/hdd/nguyenlc/dms/deskew/' 
out_filename = in_path.split('/')[-1][:-4] + f'_{method}' + in_path[-4:]
out_path = out_folder + out_filename

start = time.time()

in_img, out_img = deskew_7(in_img)

end = time.time()
process_time = str(round((end-start)*1000, 3)) + ' ms'

desc = f'{method}: ' + process_time
save_img(desc, in_img, out_img, out_path)

In [93]:
import os

skew_image_path = '/hdd/nguyenlc/dms/deskew/images/skew_input/'
skew_image = [skew_image_path+image for image in os.listdir(skew_image_path)]

not_skew_image_path = '/hdd/nguyenlc/dms/deskew/images/not_skew_input/'
not_skew_image = [not_skew_image_path+image for image in os.listdir(not_skew_image_path)]

len(skew_image), len(not_skew_image)

(23, 15)

In [118]:
from tqdm import tqdm
def process(in_img_list, out_folder, func):
    for in_path in tqdm(in_img_list):
        in_img = read_img(in_path)

        out_filename = in_path.split('/')[-1][:-4] + f'_{func.__name__}' + in_path[-4:]
        out_path = out_folder + out_filename

        start = time.time()

        in_img, out_img = func(in_img)

        end = time.time()
        process_time = str(round((end-start)*1000, 3)) + ' ms'

        desc = f'{func.__doc__}: ' + process_time
        save_img(desc, in_img, out_img, out_path)

In [124]:
# funcs = [deskew_1, deskew_2, deskew_3, deskew_4, deskew_5, deskew_6]
funcs = [deskew_7]

# Skew image
out_folder = '/hdd/nguyenlc/dms/deskew/images/deskew_output/' 
for func in funcs:
    print(func.__name__)
    
    process(skew_image, out_folder, func)

deskew_7


  0%|          | 0/23 [00:00<?, ?it/s]

100%|██████████| 23/23 [00:12<00:00,  1.84it/s]


In [125]:
# Not skew image
out_folder = '/hdd/nguyenlc/dms/deskew/images/de_notskew_ouput/' 
for func in funcs:
    print(func.__name__)

    process(not_skew_image, out_folder, func)

deskew_7


100%|██████████| 15/15 [00:08<00:00,  1.85it/s]
