# OCR using pytesseract and OpenCV by Hew Kok Sing

## Import the cv2 and pytesseract library (install cv2 and tesseract before import)

In [None]:
import cv2 
import pytesseract

## Declare the location using pytesseract.pytesseract.tesseract_cmd attribute to set up the PATH environment

In [None]:
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

## Read the image to the img variable

In [None]:
img = cv2.imread('C:\\Users\koksi\\Tesseract OCR\\test_v2\\test\\TEST_0030.jpg') #this is to get the image

# Pre-process the Image

In [None]:
import numpy as np #numerical python

In [None]:
#all the functions for image preprocessing

# get grayscale image
def get_grayscale(image): #to simplify the image
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# noise removal
def remove_noise(image):
    return cv2.medianBlur(image,5)

#dilation
def dilate(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.dilate(image, kernel, iterations = 1)
    
#erosion
def erode(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.erode(image, kernel, iterations = 1)

#opening - erosion followed by dilation
def opening(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)

def closing(image):
    kernel = np.ones((5,5),np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)

#skew correction
def deskew(image):
    coords = np.column_stack(np.where(image > 0))
    angle = cv2.minAreaRect(coords)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return rotated

#resize image
def resize_image(image):
    scale_percent = 220 # percent of original size
    width = int(img.shape[1] * scale_percent / 100)
    height = int(img.shape[0] * scale_percent / 100)
    dim = (width, height)

    return cv2.resize(img, dim, interpolation = cv2.INTER_AREA)

#add padding to image
def add_padding(image):
    return cv2.copyMakeBorder(image, 50, 50, 100, 100, cv2.BORDER_CONSTANT, value=(255, 255, 255))

In [None]:
addPadding = add_padding(img)
gray = get_grayscale(addPadding)
#dilate = dilate(gray)
#noise = remove_noise(gray)
#opening = opening(img)
#closing = closing(gray)
#resized = resize_image(gray)

## Display the text in the image in a window

In [None]:
#pytesseract only accepts RGB value, openCV is in BGR
#so we have to convert it before we send it to pytesseract library

img = cv2.cvtColor(gray, cv2.COLOR_BGR2RGB) #convert BGR to RGB (just preprocess with grayscale)
cv2.imshow('Result', gray) #display the image
cv2.waitKey(0) #delay for infinityy


## Display the text in the image to the output

In [None]:
# Adding custom options
custom_config = r'--oem 3 --psm 6' #this is a custom confiq oem 3 = engine mode 3-best accuracy #psm = page segmentation mode 6-default 
tessdata_dir_config = '--tessdata-dir "C:\\Program Files\\Tesseract-OCR\\tessdata"'
print(pytesseract.image_to_string(gray, config=tessdata_dir_config))
#print(pytesseract.image_to_string(img))

## Display the text - img_to_boxes

In [None]:
print(pytesseract.image_to_boxes(gray, config=tessdata_dir_config)) #show each characters' bounding box information

# x y width height 

## Display bounding box around characters - detecting characters 

In [None]:
hImg, wImg, _ = img.shape #obtain size info of the image

boxes = pytesseract.image_to_boxes(img, config=tessdata_dir_config) #store each characters' bounding box information into a list

for b in boxes.splitlines():
    #print(b)
    b = b.split(' ') #split each value by a space
    #print(b)
    x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4]) #convert string to int and assign to variable
    cv2.rectangle(img, (x, hImg - y), (w, hImg - h), (0, 0, 255), 2) #create a rectangle bounding box
    cv2.putText(img, b[0], (x + 250, hImg - y), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 2) #put the text beside
    
cv2.imshow('Result', img) #display the image
cv2.waitKey(0) #delay for infinity

## Display bounding box around words - detecting words

In [None]:
hImg, wImg, _ = img.shape #obtain size info of the image

boxes = pytesseract.image_to_data(img, config=tessdata_dir_config) #store each characters' bounding box information into a list
#print(boxes)
for x, b in enumerate(boxes.splitlines()):
    if x!=0:
        b = b.split() #split each value
        #print(b)
        if len(b) == 12:
            x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9]) #convert string to int and assign to variable
            cv2.rectangle(img, (x, y), (w + x, h + y), (0, 0, 255), 2) #create a rectangle bounding box
            #cv2.putText(img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 0, 0), 2) #put the text beside

cv2.imshow('Result', img) #display the image
cv2.waitKey(0) #delay for infinity

# EasyOCR Text Recognition by Christopher Ewe Kah Thong

In [None]:
import easyocr
import cv2
from matplotlib import pyplot as plt
import numpy as np

In [None]:
img_path = 'file_path' #file path needed here, the test item must be inside the folder
img = cv2.imread(img_path)
reader = easyocr.Reader(['en'])
result = reader.readtext(img, paragraph = True)
print(result)

In [None]:
img = cv2.imread(img_path)
org = (225, 20) # where to show output text (x, y)

for detection in result: 
    top_left = tuple([int(val) for val in detection[0][0]])
    bottom_right = tuple ([int(val) for val in detection[0][2]])
    text = detection[1]
    font = cv2.FONT_HERSHEY_SIMPLEX
    imgView = cv2.rectangle(img, top_left, bottom_right, (255, 0, 0), 1) #red color box 
    imgView = cv2.putText(img, text, org, font, 0.5, (0, 0, 0), 1, cv2.LINE_AA) #black output text, thickness = 1, font scale = 0.5

    plt.figure(figsize=(20, 20))
    plt.imshow(imgView)
    plt.show()

# Keras OCR Text Recognition by Kam Wei Ming

## Before we start, we need to pip install a package for running the overall of the program

### pip install keras-ocr ← put this code in Anaconda Navigator terminal to install

## Coding part

### Firstly we need to import two necessary package that we need in this project, matplotlib.pyplot will be use for visualizing the data output

In [None]:
import matplotlib.pyplot as plt
import keras_ocr as ocr

### the code below keras-ocr will automatically download pretrained weights for the detector and recognizer.
### For short, pipeline was initialized for create the text detection

In [None]:
pipeline = ocr.pipeline.Pipeline()

### The next line of code is easy to understand as the line already told to us. Which is the tools starting to read the image from the path of the image in our computer

In [None]:
images = [
    ocr.tools.read(image) for image in [
        "C:\\Users\\Aaron\\Untitled Folder\\test\\TEST_0001.jpg",
        "C:\\Users\\Aaron\\Untitled Folder\\test\\TEST_0002.jpg",
    ]
]

### Come to the next coding line. The code is for showing the image

In [None]:
plt.figure(figsize = (10, 20))
plt.imshow(images[0])
plt.figure(figsize = (10, 20))
plt.imshow(images[1])

### Here is taking the pipeline object that created on the top and calling ocr tools recognize and passing the image from the top

In [None]:
prediction_groups = pipeline.recognize(images)

### Here is to plot the predictions

In [None]:
fig, axs = plt.subplots(nrows=len(images), figsize=(10, 20))
for ax, image, predictions in zip(axs, images, prediction_groups):
    ocr.tools.drawAnnotations(image=image, predictions=predictions, ax=ax)

### Last, here will print the prediction of the object

In [None]:
prediction_groups