In [None]:
import cv2
from PIL import Image
import numpy as np
import math

import matplotlib.pyplot as plt
import matplotlib as mpl

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

%matplotlib inline
mpl.rcParams['image.cmap'] = 'gray'
mpl.rcParams['figure.figsize'] = (20, 10)

In [None]:
file = "images/bub_gb_gBAAnTXm89EC_images/gb_gBAAnTXm89EC_000108.png"
# file = "images/bub_gb_gBAAnTXm89EC_images/gb_gBAAnTXm89EC_000364.png"
img = cv2.imread(file, 0)

In [None]:
(thresh, img_bin) = cv2.threshold(img, 128, 255, 
                    cv2.THRESH_BINARY|cv2.THRESH_OTSU)

# Invert the image
img_bin = 255 - img_bin

img_bin = cv2.GaussianBlur(img_bin, (31,41), sigmaX=20)

plt.axis("off")
plt.imshow(cv2.hconcat([img, img_bin]))

In [None]:
def f(x):
    # Defining a kernel length
    global horizontal_lines_img, verticle_lines_img, kernel
    kernel_length = np.array(img).shape[1]//x

    # A verticle kernel of (1 X kernel_length), which will detect all the verticle lines from the image.
    verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))

    # A horizontal kernel of (kernel_length X 1), which will help to detect all the horizontal line from the image.
    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))

    # A kernel of (3 X 3) ones.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

    # Morphological operation to detect vertical lines from an image
    img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
    verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
    cv2.imwrite("verticle_lines.jpg",verticle_lines_img)

    # Morphological operation to detect horizontal lines from an image
    img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
    plt.subplot(1, 2, 1)  # 2 rows, 2 columns, 1st subplot = top left
    plt.imshow(verticle_lines_img);
    plt.axis('off')

    plt.subplot(1, 2, 2)  # 2 rows, 2 columns, 2nd subplot = top right
    plt.imshow(horizontal_lines_img);
    plt.axis('off')
    
interact(f,  x=widgets.IntSlider(min=1,max=255,step=1,value=4));

In [None]:
# Weighting parameters, this will decide the quantity of an image to be added to make a new image.
alpha = 0.5
beta = 1.0 - alpha
# This function helps to add two image with specific weight parameter to get a third image as summation of two image.
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
plt.axis('off')
plt.imshow(img_final_bin)

In [None]:
def sort_contours(cnts, method="left-to-right"):
    # initialize the reverse flag and sort index
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
        key=lambda b:b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

# Find contours for image, which will detect all the boxes
contours, hierarchy = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Sort all the contours by top to bottom.
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")

In [None]:
box_list = []
contour_list = []
hull_list = []
height, width = img.shape
blank_image = 255 - np.zeros((height, width), np.uint8)

for contour, boundingBox in zip(contours, boundingBoxes):
    area = cv2.contourArea(contour)
    if area > 2000 and area < 100000:
        hull = cv2.convexHull(contour)
        hull_list.append(hull)
        cv2.drawContours(blank_image, [hull], -1, 0, -1)
        box_list.append(boundingBox)
        contour_list.append(contour)
        
print(len(box_list))

In [None]:
plt.subplot(1, 2, 1)
plt.imshow(img)
plt.subplot(1, 2, 2)
plt.imshow(blank_image);

In [None]:
ROI_number = 0
ROI_list = []
for c in hull_list:
    x,y,w,h = cv2.boundingRect(c)
    ROI = img[y:y+h, x:x+w]
    ROI_list.append(ROI)
    cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
    ROI_number += 1

In [None]:
plt.imshow(ROI_list[22])

In [None]:
import pytesseract

In [None]:
import pandas as pd

In [None]:
mpl.rcParams['figure.figsize'] = (10, 5)

In [None]:
data = {}
for n, roi in enumerate(ROI_list):
    text = pytesseract.image_to_string(roi)
    data[n] = text

In [None]:
len(data)/7

In [None]:
matrix = []
for m in range(0,7):
    row = []
    for n in range(0,13):
        row.append(data[m+n])
    matrix.append(row)

In [None]:
pd.DataFrame(matrix)

In [None]:
data

In [None]:
[data[n+2] for n in range(0, len(data), 7) ]