In [1]:
import cv2
import pytesseract
import numpy as np

# Test character boxing
img = cv2.imread('../cropped text/text1.jpg')
h, w, c = img.shape
boxes = pytesseract.image_to_boxes(img) 
for b in boxes.splitlines():
    b = b.split(' ')
    img = cv2.rectangle(img, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), (0, 255, 0), 2)

cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [2]:
from pytesseract import Output

# Test word boxing
img = cv2.imread('../cropped text/text1.jpg')
d = pytesseract.image_to_data(img, output_type=Output.DICT)
print(d.keys())
n_boxes = len(d['text'])
for i in range(n_boxes):
    if int(d['conf'][i]) > 60:
        (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
        img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)

cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

dict_keys(['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text'])


In [3]:
# get grayscale image
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image,amount=5):
    return cv2.medianBlur(image,amount)
 
#thresholding
def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

#dilation
def dilate(image):
    kernel = np.ones((2,8),np.uint8)
    return cv2.dilate(image, kernel, iterations = 3)
    
#erosion
def erode(image,mode='V'):
    if mode == 'V':
        kernel = np.ones((2,1),np.uint8)
    elif mode == 'S':
        kernel = np.ones((3,3),np.uint8)
        kernel[0][0] = 0
        kernel[0][-1] = 0
        kernel[-1][0] = 0
        kernel[-1][-1] = 0
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

#canny edge detection
def canny(image):
    return cv2.Canny(image, 100, 200)

In [4]:
def show_img(image, name='img'):
    cv2.imshow(name, image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [5]:
def morph(image, mode='close'):
    if mode == 'close':
        threshold_img = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,5))
        return cv2.morphologyEx(threshold_img, cv2.MORPH_CLOSE, kernel)
    elif mode == 'open':
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20,1))
        return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)

In [6]:
# Test preprocessing technqiues

image_base = cv2.imread('../cropped text/text3 pre-edit.jpg')

image = remove_noise(image_base)

show_img(image,'base')

gray = get_grayscale(image)
show_img(gray,'gray')

thresh = thresholding(gray)
show_img(thresh,'thresh')

opening_img = opening(gray)
show_img(opening_img,'opening')
 
canny_img = canny(gray)
show_img(canny_img,'canny')

#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,5))
#morph_img = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
morph_img = morph(gray)
show_img(morph_img,'morph')


print("Base:",pytesseract.image_to_string(image_base))
print("Remove noise:",pytesseract.image_to_string(image))
print("Greyscale:",pytesseract.image_to_string(gray))
print("Threshold:",pytesseract.image_to_string(thresh))
print("Opening:",pytesseract.image_to_string(opening_img))
print("Canny:",pytesseract.image_to_string(canny_img))
print("Morph:",pytesseract.image_to_string(morph_img))


Base: 
Remove noise: 
Greyscale: 
Threshold: 
Opening: 
Canny: er

 

QUO.0

Peele are

Morph: Rattling of doors & windows.



In [7]:
def find_and_replace_lines(img,show=False):
    # Find edges with hough transform
    gs_img = get_grayscale(img)
    morph_img = dilate(gs_img)
    edges = cv2.Canny(morph_img,255/3,255,apertureSize = 3)
    if show:
        show_img(edges,'edges')
    lines = cv2.HoughLinesP(edges,0.1,np.pi/180,70,minLineLength=80,maxLineGap=3)
    
    # If lines are detected
    if lines is not None:
        # Make copies of image for later use
        height, width, channels = img.shape
        line_base = np.zeros((height,width,channels),np.uint8)
        img_cpy = img.copy()
        img_mask = img.copy()
        
        # Add lines to separate image
        for line in lines:
            x1,y1,x2,y2 = line[0]
            x1,y1,x2,y2 = x1,y1,x2,y2
            slope = (y2 - y1) / (x2 - x1)
            scale = 2
            x1new = int(x1 - scale)
            x2new = int(x2 + scale)
            y1new = int(y1 - scale*slope)
            y2new = int(y2 + scale*slope)
            cv2.line(line_base,(x1new,y1new),(x2new,y2new),(255,255,255),10)
            cv2.line(img_cpy,(x1new,y1new),(x2new,y2new),(0,225,0),10)
        if show:
            show_img(img_cpy,'lines')
        
        # Create mask for the line image based on location of text in the original
        # Erode and dilate image to mostly isolate text
        img_mask = cv2.bitwise_not(img_mask)
        kernel = np.ones((15,1),np.uint8)
        img_mask = cv2.erode(img_mask, kernel, iterations = 1)
        
        # Reshape kernel for dilation
        kernel = kernel.reshape(-1)
        i = int(len(kernel)/2)
        while i > 0:
            kernel[i] = 0
            i -= 1
        kernel = kernel.reshape(-1,1)
        
        img_mask = cv2.dilate(img_mask, kernel, iterations = 1)
        if show:
            show_img(img_mask,'img_mask')
            show_img(line_base,'line_base')
            
        # Calculate line mask by subtracting the image mask from the line base
        line_mask = cv2.bitwise_and(img_mask, line_base)
        line_mask = cv2.bitwise_not(line_mask)
        masked_lines = cv2.bitwise_and(line_mask, line_base)

        # Remove lines from the original image using the line mask
        img = cv2.bitwise_or(img, masked_lines)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        ret,img = cv2.threshold(img,20,255,cv2.THRESH_BINARY)
        if show:
            show_img(masked_lines,'masked_lines')
            show_img(img,'final')
    # If no lines detected
    else:
        pass
        #print("no lines")
    return img

In [10]:
import os
        
directory = '../cropped text/'
show = False
cv2.destroyAllWindows()
print(os.listdir(directory))

# Iterate over all image files in the specified directory
for filename in os.listdir(directory):
    if filename.endswith(".jpg") or filename.endswith(".JPG"):
        filepath = os.path.join(directory, filename)
        print(filepath)
        
        # Read image
        img = cv2.imread(filepath)
        # Find lines and replace with white (i.e. remove lines)
        img = find_and_replace_lines(img, show) 
        # Blur image
        rm_noise_img = remove_noise(img)
        # Convert image to grayscale
        if len(rm_noise_img.shape) == 3:
            gs_img = get_grayscale(rm_noise_img)
        else:
            gs_img = rm_noise_img
        # Apply morph filter to image
        m_img = morph(gs_img)
        #m_img = cv2.bitwise_not(m_img) # Invert image colors
        #m_img = erode(m_img,mode='S')
        #m_img = cv2.bitwise_not(m_img) # Invert image colors

        # Extract text
        print("Image Text:\n_______________")
        custom_config = r'-c tessedit_char_blacklist=éÓóÔôÒòÑñÖö¡ÇçŒœßØøÅåÆæÐð --psm 6'
        #custom_config = r'-c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ,.;:/1235467890!@#$%^&*" --psm 6'
        print(pytesseract.image_to_string(img,config=custom_config))
        print("_______________")
        if show:
            show_img(m_img,'morph')
        print("\n\n")
    else:
        # if not of type .jpg
        continue    


['sudoku.png', 'text1.jpg', 'text10.JPG', 'text11.JPG', 'text12.JPG', 'text13.JPG', 'text14.JPG', 'text15.JPG', 'text16.JPG', 'text17.jpg', 'text18.JPG', 'text19.JPG', 'text2.jpg', 'text20.JPG', 'text21.JPG', 'text22.JPG', 'text23.JPG', 'text24.JPG', 'text25.JPG', 'text26.JPG', 'text27.JPG', 'text28.JPG', 'text29.JPG', 'text3 pre-edit.jpg', 'text3.jpg', 'text30.JPG', 'text4.jpg', 'text5.1.jpg', 'text5.2.jpg', 'text5.3.jpg', 'text5.JPG', 'text6.JPG', 'text7.JPG', 'text8.JPG', 'text9.JPG']
../cropped text/text1.jpg
Image Text:
_______________
Almost all in
Community

_______________



../cropped text/text10.JPG
Image Text:
_______________
Lyeryone

_______________



../cropped text/text11.JPG
Image Text:
_______________
Not in our area to my_knowledgee _
Cracked plaster, foundations, dishes, ctce

_______________



../cropped text/text12.JPG
Image Text:
_______________
c. Frightened: . ,
No one, few, many, all (in your home) (in_commynity)s ‘general panic Several
frightened-No pani

line removal alg:  
have original image  
detect lines with houghlines  
use erosion followed by dialation with a vertical kernel on a copy of the original image  
bitwise & with the lines detected by houghlines  
subtract the bitwise & mask from the lines detected by houghlines  
subtract the masked houghlines from the original image - lines are now removed  