In [None]:
!apt install tesseract-ocr
!pip install pytesseract
!pip install python-doctr
!pip install tf2onnx
!pip install tensorflow-addons==0.16.1
!pip install rapidfuzz==2.15.1

In [None]:
import numpy as np
import cv2
import argparse
import math
from scipy import ndimage
from PIL import Image
from PIL import ImageEnhance
import pytesseract
from pytesseract import Output
import tensorflow_addons as tfa
from google.colab.patches import cv2_imshow
import os
import re

 The versions of TensorFlow you are currently using is 2.12.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [None]:
from skimage.transform import hough_line, hough_line_peaks
from skimage.transform import rotate, resize
from skimage.feature import canny
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from scipy.stats import mode

In [None]:
def enhance(img):
  curr_bri = ImageEnhance.Brightness(img)
  new_bri = 1.2
  img = curr_bri.enhance(new_bri)
  curr_col = ImageEnhance.Color(img)
  new_col = 1.2
  img = curr_col.enhance(new_col)
  curr_con = ImageEnhance.Contrast(img)
  new_con = 1.2
  img= curr_con.enhance(new_con)
  curr_sha = ImageEnhance.Sharpness(img)
  new_sha = 2
  img = curr_sha.enhance(new_sha)
  return img

In [None]:
def skew_angle_hough_transform(image):
  '''
  Method to calculate and fetch skew angle of the ePOD image
  '''
  grayscale_image = image.convert('L')
  # Convert the grayscale image to a NumPy array
  image = np.array(grayscale_image)
  # convert to edges
  edges = canny(image)
  # Classic straight-line Hough transform between 0.1 - 180 degrees.
  tested_angles = np.deg2rad(np.arange(0.1, 180.0))
  h, theta, d = hough_line(edges, theta=tested_angles)

  # find line peaks and angles
  accum, angles, dists = hough_line_peaks(h, theta, d)

  # round the angles to 2 decimal places and find the most common angle.
  most_common_angle = mode(np.around(angles, decimals=2))[0]

  # convert the angle to degree for rotation.
  skew_angle = np.rad2deg(most_common_angle - np.pi/2)
  return skew_angle

In [None]:
def rotate_img_manual(image):
  img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  img_edges = cv2.Canny(img_gray, 100, 100, apertureSize=3)
  lines = cv2.HoughLinesP(img_edges, 1, math.pi / 180.0, 100, minLineLength=100, maxLineGap=5)

  angles = []

  for [[x1, y1, x2, y2]] in lines:
      # cv2.line(img_before, (x1, y1), (x2, y2), (255, 0, 0), 3)
      angle = math.degrees(math.atan2(y2 - y1, x2 - x1))
      angles.append(angle)

  median_angle = np.median(angles)
  if median_angle < 0:
    img_rotated = ndimage.rotate(image, -median_angle)
  else:
    img_rotated = ndimage.rotate(image, median_angle)
  return img_rotated, median_angle

##Using pytesseract

In [None]:
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
image_path = #give image path
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
config = ('-l eng --oem 1 --psm 3')
text = pytesseract.image_to_data(img, config=config)
#text = text.split('\n')
print(text)

## Using doctr

In [None]:
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from doctr.models.predictor.pytorch import rotate_image

In [None]:
model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True, export_as_straight_boxes=True)

Downloading https://doctr-static.mindee.com/models?id=v0.2.0/db_resnet50-adcafc63.zip&src=0 to /root/.cache/doctr/models/db_resnet50-adcafc63.zip


  0%|          | 0/94178964 [00:00<?, ?it/s]

Downloading https://doctr-static.mindee.com/models?id=v0.3.0/crnn_vgg16_bn-76b7f2c6.zip&src=0 to /root/.cache/doctr/models/crnn_vgg16_bn-76b7f2c6.zip


  0%|          | 0/58758994 [00:00<?, ?it/s]

In [None]:
# binary_pil_image = Image.fromarray(img[0])
# # binary_pil_image = binary_pil_image.resize((900, 900))
# img_l = np.array(enhance(binary_pil_image))
# angle = skew_angle_hough_transform(binary_pil_image)
# img_l = rotate_image(img_l,angle=float(angle),expand = True) ## for deskewing

# rotated_img = rotate_img_manual(img_l) # for rotating
# img = [rotated_img]
# result = model(img)
# result.show(img)
# json_output = result.export()

In [None]:
for img_path in os.listdir(#dir name):
  img_path_full = os.path.join('#dir name', img_path)
  print(img_path_full)
  img = DocumentFile.from_images(img_path_full)
  binary_pil_image = Image.fromarray(img[0])
  binary_pil_image = binary_pil_image.resize((1000, 1000))
  # print(binary_pil_image)

  angle = skew_angle_hough_transform(binary_pil_image)
  # print(angle)
  angle = -angle
  img_l = rotate_image(np.array(binary_pil_image), angle=float(angle),expand = True) # for deskewing
  # cv2_imshow(img_l)

  rotated_img, angle = rotate_img_manual(img_l) # for rotating
  # cv2_imshow(rotated_img)

  # resized_img = cv2.resize(rotated_img, (800, 1200))
  img_l = np.array(enhance(Image.fromarray(rotated_img)))

  img = [img_l]

  result = model(img)
  # result.show(img)
  lines = []

  for page in result.pages:
      for block in page.blocks:
          for line in block.lines:
              words = []
              for word in line.words:
                  words.append(word.value)
              line_text = " ".join(words)
              lines.append(line_text)

  text = "\n".join(lines)
  print(text)

  #Extract weight
  weight_pattern1 = r"\b(?:weight|wt)\s?\(?\w+\)?\s?:?\s?(\d+(\.\d+)?)\b"
  weight_pattern2 = r"\b(?:weight|wt)\s?:?\s?(\d+(\.\d+)?)\s?\b"
  weight_pattern3 = r"\d+(\.\d+)?\s?(?i:kg|k\s?g\s?s|kilograms|kg5|(kgs))\b"

  weight_matches1 = re.findall(weight_pattern1, text, flags=re.IGNORECASE)
  weight_matches2 = re.findall(weight_pattern2, text, flags=re.IGNORECASE)
  weight_matches3 = re.findall(weight_pattern3, text, flags=re.IGNORECASE)

  if weight_matches1 or weight_matches2 or weight_matches3:
    if weight_matches1:
        shipment_weight = weight_matches1[0][0]
    elif weight_matches2:
        shipment_weight = weight_matches2[0][0]
    elif weight_matches3:
        shipment_weight = weight_matches3[0][0]
    print("Shipment Weight:",shipment_weight,"kg")

  # Extract number of boxes
  boxes_pattern1 = r"\b(?:box|boxes|quantity|packages|No\.?\s?of\s?pkgs)[:#]?[:#]?\s?(\d+)\b"
  boxes_pattern2 = r"\b(\d+)\s?(?:box|boxes|quantity|packages|No\.?\s?of\s?pkgs)\b"

  box_matches1 = re.findall(boxes_pattern1, text, flags=re.IGNORECASE)
  box_matches2 = re.findall(boxes_pattern2, text, flags=re.IGNORECASE)

  if box_matches1 or box_matches2:
    if box_matches1:
        num_boxes = box_matches1[0]
    elif box_matches2:
        num_boxes = box_matches2[0]
    print("Number of boxes:",num_boxes)

  print('-------------------------------------------')