In [None]:
import numpy as np
import cv2
import fitz

In [None]:
# Use This chunk of code when you want to convert a pdf to image

file_path = ""
image_file_name = ""
def pdf_to_image_file(file_path, output_path):
    """
        read the pdf file, convert it into an image, save the image in a png format.  
    """
    doc = fitz.open(file_path)
    page = doc.load_page(0)
    pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5))  
    pix.save(output_path)  

pdf_to_image_file(file_path, image_file_name)

In [None]:
from typing import List
from PIL import ImageFile, Image

# Use This chunk of code when you wnat to break a bigger image into chunks.
chunk_size = 3000
def create_image_segments(image: ImageFile):
        """
            Generates 2d image segments for a given image with defined chuck size.
        """
        width, height = image.size
        chunk_size = 3000
        x_chunks = (width + chunk_size - 1) // chunk_size
        y_chunks = (height + chunk_size - 1) // chunk_size

        x_size: List = [None for _ in range(x_chunks)]
        y_size: List = [None for _ in range(y_chunks)]

        chunks_2d_list = [[y for y in y_size] for _ in x_size]

        for i, _ in enumerate(x_size):
            for j, _ in enumerate(y_size):
                left = i * chunk_size
                upper = j * chunk_size
                right = min(left + chunk_size, width)
                lower = min(upper + chunk_size, height)

                image_chunk = image.crop((left, upper, right, lower))

                chunks_2d_list[i][j] = image_chunk

        return chunks_2d_list

def save_image_chunks(images):
    """
        Save the image chunks in a specified folder.
    """
    for i, row in enumerate(images):
        for j, col_image in enumerate(row):
            col_image.save(f"./public/segments/image_{i}_{j}.png")

image = Image.open(
    image_file_name
)

full_image_width, full_image_height = image.size


image_segments = create_image_segments(
    image
)
save_image_chunks(
     image_segments
)

In [None]:
# DONT USE THIS CODE, THIS IS DEPRECATED.  

# DEPRECATED IMPLEMENTATION
def pre_process_image(image):
    """
        DEPRECATED FUNCTION
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    processed_image_with_thining = cv2.ximgproc.thinning(image)
    image = processed_image_with_thining

    return image

def detect_line_segments(image):
    """
        DEPRECATED FUNCTION
    """
    preprocessed_image = pre_process_image(image)
    line_segments = cv2.HoughLinesP(
        preprocessed_image, rho=0.1,
        theta=np.pi / 1080,
        threshold=5,
        minLineLength=10,
        maxLineGap=None
    )

    return [ ls[0] for ls in line_segments ]

Use the command to save the open cv image.
**cv2.imwrite( save_path, opencv_image )**

In [None]:
line_segments = []  #detect_line_segments(cv2.imread('./bentler_bom_image.png'))

In [None]:
def write_line_segment_image():
    display_image = np.ones((5000, 13000, 3), dtype=np.uint8) * 255
    for segment in line_segments:
        x, y, _x, _y = segment
        cv2.line(display_image, (x, y), (_x, _y), (0, 0, 255), 2)  
    cv2.imwrite("line_segments.png", display_image)

# write_line_segment_image()

### table detection with the imported image
- load the image or load the image pdf and then convert it into an image
- apply processing to enhance the table structures.
- Use contour detection and line detection (via Hough Transform) to identify table edges.
- Crop the detected table areas based on bounding boxes from detection models.

In [None]:
# Using Canny and Hough Transform to detect the lines in the image.
# Use This Code chunk for line detection. THE PREVIOUS ONE IS DEPRECATED.

test_image_path = "./public/segments/image_3_0.png"

def get_line_segments(image_path):
    gray = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, rho=1, theta=np.pi/180, threshold=100, minLineLength=50, maxLineGap=10)
    return lines

def write_cv_image(line_segments, output_path , width, height):
    display_image = np.ones((height, width, 3), dtype=np.uint8) * 255
    if line_segments is not None:
        for line in line_segments:
            x1, y1, x2, y2 = line[0]
            cv2.line(display_image, (x1, y1), (x2, y2), (0, 0, 0), 2)
    cv2.imwrite(output_path, display_image)

line_segments = get_line_segments(test_image_path)
write_cv_image(line_segments, 'test_image_segments_line_segments.jpg', 3000, 3000)

In [None]:
import os
import json
from math import floor

folder_path = "./public/segments"

def get_bounding_box(polygon_data):
    """
        Get bounding box coordinates from datastructure provided by the json
    """
    polygon_points = [(polygon_data[i], polygon_data[i + 1]) for i in range(0, len(polygon_data), 2)]

    all_x = [point[0] for point in polygon_points]
    all_y = [point[1] for point in polygon_points]

    return [min(all_x), min(all_y), max(all_x), max(all_y)]

def convert_to_2dim(index, m):
    """
        convert a single dimension array to 2-dimensions.
    """
    return (index // m, index % m)


def get_bounding_boxes_from_json_files(folder_path):
    json_data_arr = []
    # load the json data and append it in an array
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
                json_data_arr.append(data)

    # append bounding box result in an array.
    json_bboxes = []
    for json_data in json_data_arr:
        bboxes = []
        for page in json_data["analyzeResult"]['pages']:
            for word in page['words']:
                bboxes.append(get_bounding_box(word['polygon']))

        json_bboxes.append(bboxes)

    return json_bboxes

# show only some for example
json_bboxes = get_bounding_boxes_from_json_files(folder_path)
json_bboxes[0:1]

In [None]:
# DISPLAY THE RESULT AS AN EXAMPLE FOR A IMAGE SEGMENT

import matplotlib.pyplot as plt

indx_value = 6
(x, y) = convert_to_2dim(indx_value, 2)

print(x, y)

plt.figure(figsize=(10, 8), dpi=300)
plt.imshow(image_segments[x][y])
ax = plt.gca()

# displaying only the first example since the first example image is displayed right here.
for bbox in json_bboxes[indx_value]:
    (x1, y1, x2, y2) = bbox

    width = x2 - x1
    height = y2 - y1

    rect = plt.Rectangle((x1, y1), width, height, fill=False, color="red", linewidth=0.2)
    ax.add_patch(rect)

plt.axis('off')
plt.show()

In [None]:
def get_absolute_bboxes():
    x_chunks = (full_image_width + chunk_size - 1) // chunk_size
    y_chunks = (full_image_height + chunk_size - 1) // chunk_size
    absolute_bboxes = []
    for i in range(x_chunks):
        for j in range(y_chunks):
            offsetX = i * chunk_size
            offsetY = j * chunk_size

            # convert 2d to one dimension.
            bboxes = json_bboxes[i * y_chunks + j]
            for bbox in bboxes:
                x, y, _x, _y = bbox
                absolute_bboxes.append([offsetX + x, offsetY + y, offsetX + _x, offsetY + _y])

    return absolute_bboxes

absolute_bboxes = get_absolute_bboxes()

absolute_bboxes

In [None]:
# DISPLAY THE RESULT FOR A FULL SEGMENT

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8), dpi=500)
plt.imshow(image)
ax = plt.gca()

# displaying only the first example since the first example image is displayed right here.
for bbox in absolute_bboxes:
    x1, y1, x2, y2 = bbox

    width = x2 - x1
    height = y2 - y1

    rect = plt.Rectangle((x1, y1), width, height, fill=False, color="red", linewidth=0.2)
    ax.add_patch(rect)

plt.axis('off')
plt.show()

In [None]:
# perform the same action that you perform in PID 

def clear_bboxes(image_file_path, output_path):
    """
        Whiten the areas that are detected by OCR.
    """
    image_cv2 = cv2.imread(image_file_path)
    hist = cv2.calcHist([image_cv2], [0], None, [256], [0, 256])
    background_value = int(np.argmax(hist))

    if(len(absolute_bboxes) == 0):
        return

    for bb in absolute_bboxes:
        topX, topY, bottomX, bottomY = bb
        points = np.array([[bottomX, topY],
                            [bottomX, bottomY],
                            [topX, bottomY],
                            [topX, topY]],
                            np.int32)
        cv2.fillPoly(image_cv2, [points], (background_value, background_value, background_value))

    # show it in image.
    cv2.imwrite(output_path, image_cv2)

clear_bboxes(image_file_name, "removed_bbox_areas.png")

In [None]:
lines = get_line_segments("./removed_bbox_areas.png")
write_cv_image(lines, 'line_segments_after_removing_ocr_bboxes.png', full_image_width, full_image_height)

In [None]:
from itertools import combinations
from collections import defaultdict
from shapely.geometry import LineString
import math

def normalize_line(line_coords):
    x1, y1, x2, y2 = line_coords
    p1 = (x1, y1)
    p2 = (x2, y2)
    if p1 <= p2:
        return LineString([p1, p2])
    else:
        return LineString([p2, p1])

def find_lines_with_more_than_three_intersections(lines_coords):
    
    # filter in the horizontal and verticle lines first
    def get_slope_between_points(x1, y1, x2, y2):
        '''
        Returns the slope between two points.
        '''
        x_delta = x2 - x1
        if x_delta == 0:
            return math.inf
        return (y2 - y1) / x_delta


    lines = [normalize_line(lc) for lc in get_slope_between_points()]

    intersection_counts = defaultdict(int)
    for i in range(len(lines)):
        for j in range(i + 1, len(lines)):
            line1 = lines[i]
            line2 = lines[j]
            if line1.intersects(line2):
                intersection_counts[i] += 1
                intersection_counts[j] += 1
    
    # Filter lines with more than 3 intersections
    result = []
    for idx, count in intersection_counts.items():
        if count > 3:
            result.append(lines_coords[idx])
    
    return result


intersecting_lines = find_lines_with_more_than_three_intersections([line[0] for line in lines])
for line in intersecting_lines:
    print(line)

In [None]:
import cv2

display_image = np.ones((5000, 13000, 3), dtype=np.uint8) * 255
for line in intersecting_lines:
    x, y, _x, _y = line
    cv2.line(display_image, (x, y), (_x, _y), (0, 0, 255), 2)  
cv2.imwrite("intersecting_lines.png", display_image)

# write_cv_image(np.array([[i] for i in intersecting_lines], dtype=np.uint8), 'intersecting_lines.png', full_image_width, full_image_height)