# CVIT Orientation : Problem Statement

Line & Word level segementation in documents is an important task for extracting text ( OCR ) task . 

In the following case , we take a look at different printed documents and try to extract charecter line segment and word level arguments. 

References : 
1.https://pyimagesearch.com/2021/04/28/opencv-morphological-operations/


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import os 
import cv2 
import numpy as np
from matplotlib import pyplot as plt
from google.colab.patches import cv2_imshow

In [3]:
PATH_MEDIUM = '/content/drive/MyDrive/cvimages/day2/images 2/test_tamil.jpg'
PATH_LINE_EXAMPLE = '/content/drive/MyDrive/cvimages/day2/images 2/test_sanskrit.png'
PATH_HARDEST = '/content/drive/MyDrive/cvimages/day2/images 2/test.jpeg'

In [None]:
test_tamil = cv2.imread(PATH_MEDIUM)
cv2_imshow(test_tamil)

In [4]:
# Idea is to leverage different morphological filters
def textdilationElementWise(binary_image,iterations=3):
  '''
  binary image : text = 1 bg = 0 
  '''
  # Small local kernel that tickens the existing text more closely
  text_kernel = np.ones((3,3))
  dilated_image = cv2.dilate(binary_image,text_kernel,iterations)
  return dilated_image


def rectangledilation(binary_image,H,W,iterations=3):
  '''
  binary image : text = 1 bg = 0 
  '''
  # Small local kernel that tickens the existing text more closely
  rectange_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(H,W))
  dilated_image = cv2.dilate(binary_image,rectange_kernel,iterations)
  return dilated_image

def erosionOperation(binary_image,H,W,iterations=3):
  # Small local kernel that erodes vertically
  rectange_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(H,W))
  eroded_image = cv2.erode(binary_image,rectange_kernel,iterations)
  return eroded_image

def crossErosion(binary_image,iterations=2):
  cross_kernel= cv2.getStructuringElement(cv2.MORPH_CROSS,(5,5))
  cross_eroded_image = cv2.erode(binary_image,cross_kernel,iterations)
  return cross_eroded_image


In [5]:

def binarize_and_word_detect_contours(image_path):
    # Load the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    # Apply image denoising
    denoised = cv2.fastNlMeansDenoising(blurred, None, h=10, templateWindowSize=7, searchWindowSize=21)
    
    # Apply binary thresholding
    _, binary = cv2.threshold(denoised, 127, 255, cv2.THRESH_BINARY)

    cv2_imshow(binary)
    # text dilation 
    print('Thicker text')
    thickTextDocument = textdilationElementWise(255-binary,iterations=5)
    cv2_imshow(thickTextDocument)

    print('Word level dilation')
    wordTextDocument = rectangledilation(thickTextDocument,H=7,W=3,iterations=3)
    cv2_imshow(wordTextDocument)

    # print('Dilation Operation to join cuts within word blobs')
    # closeDocument = textdilationElementWise(wordTextDocument,iterations=3)
    # cv2_imshow(closeDocument)

    # print('Cross Kernel Erosion : To further split interline blobs')
    # crossErodedDocument = crossErosion(closeDocument,iterations=3)
    # cv2_imshow(crossErodedDocument)

    # Find contours in the binary image
    contours, _ = cv2.findContours(wordTextDocument,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter out the extremely small boxes based on area threshold 
    areaThreshold = 5
    finalContours = [ c for c in contours if cv2.contourArea(c)>areaThreshold ]
    
    # Draw contours on the original image
    image_with_contours = cv2.drawContours(image.copy(),finalContours, -1, (0, 255, 0), 2)
    image_with_boxes = image.copy()
    for contour in finalContours:
      # Find the bounding box coordinates
      x, y, w, h = cv2.boundingRect(contour)
      # Draw the bounding box on the image
      cv2.rectangle(image_with_boxes, (x, y), (x + w, y + h), (0,0,255),3)

    
    # Display the original image and the image with contours
    print('Image With Contours')
    cv2_imshow(image_with_contours)

    # Display the original image and the image with contours
    print('Image With Boxes')
    cv2_imshow(image_with_boxes)

    # Save them in the directory 
    _,imgName = os.path.split(image_path)
    cv2.imwrite('wordlevel_contours_'+imgName,image_with_contours)
    cv2.imwrite('wordlevel_boxes_'+imgName,image_with_boxes)



In [None]:
# Usage example
binarize_and_word_detect_contours(PATH_LINE_EXAMPLE)

In [7]:
def binarize_and_line_detect_contours(image_path):
    # Load the image
    image = cv2.imread(image_path)
    
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    # Apply image denoising
    denoised = cv2.fastNlMeansDenoising(blurred, None, h=10, templateWindowSize=7, searchWindowSize=21)
    
    # Apply binary thresholding
    _, binary = cv2.threshold(denoised, 127, 255, cv2.THRESH_BINARY)

    cv2_imshow(binary)
    # text dilation 
    print('Thicker text')
    thickTextDocument = textdilationElementWise(255-binary,iterations=5)
    cv2_imshow(thickTextDocument)

    print('Line Level Dilation')

    # search the phrases
    dilatation_type = cv2.MORPH_RECT
    horizontal_dilatation = 5 
    vertical_dilatation = 0
    element = cv2.getStructuringElement(dilatation_type, (2*horizontal_dilatation + 1, 2*vertical_dilatation+1), (horizontal_dilatation, vertical_dilatation))
    dilatation_thresh = cv2.dilate(thickTextDocument,element,iterations=1)

    # Fill
    filled_tresh = dilatation_thresh.copy()
    contours, hierarchy = cv2.findContours(dilatation_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    for cnt in contours:
        cv2.drawContours(filled_tresh, [cnt], -1, 255, cv2.FILLED)
    

    # Connect these disconnected blobs further 
    print('Line level dilation')
    lineTextDocument = rectangledilation(filled_tresh,H=31,W=1,iterations=1)
    cv2_imshow(lineTextDocument)

    # Cross Filter : Erosion 
    lineTextDocument = crossErosion(lineTextDocument,iterations=10)
    print('Line erosion')
    cv2_imshow(lineTextDocument)

    # Find contours in the binary image
    contours, _ = cv2.findContours(lineTextDocument,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  
    # Filter out the extremely small boxes based on area threshold 
    areaThreshold = 1000
    finalContours = [ c for c in contours if cv2.contourArea(c)>areaThreshold ]
    
    # Draw contours on the original image
    image_with_contours = cv2.drawContours(image.copy(),finalContours, -1, (255,0,0),3)
    image_with_boxes = image.copy()

    for contour in finalContours:
      # Find the bounding box coordinates
      x, y, w, h = cv2.boundingRect(contour)
      # Draw the bounding box on the image
      cv2.rectangle(image_with_boxes, (x, y), (x + w, y + h), (0,0,255),3)

    
    # Display the original image and the image with contours
    print('Image With Contours')
    cv2_imshow(image_with_contours)

    # Display the original image and the image with contours
    print('Image With Boxes')
    cv2_imshow(image_with_boxes)

    # # Save them in the directory 
    # _,imgName = os.path.split(image_path)
    # cv2.imwrite('linelevel_contours_'+imgName,image_with_contours)
    # cv2.imwrite('linelevel_boxes_'+imgName,image_with_boxes)



In [None]:
# Usage example ( easy - less dense example )
binarize_and_line_detect_contours('/content/drive/MyDrive/cvimages/day2/images 2/test_tamil.jpg')