In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
import cv2
import os 
import numpy as np
import matplotlib.pyplot as plt
import math
from PIL import Image
import random
import matplotlib.image as mpimg
from imutils import paths
import sys
import operator

In [None]:
!pip install rich
!pip install hydra-core
!pip install pytorch-lightning

In [None]:
sys.path.insert(0, "/content/drive/MyDrive/Github_Repos/historia_ml_handwritten_text_line_segmentation")
sys.path.insert(1, "/content/drive/MyDrive/Github_Repos/historia_ml_handwritten_text_line_segmentation/src/utils")

In [None]:
import pytorch_lightning as pl
from src.utils.display_image_from_path import img_show
from src.utils.display_image_from_array import img_show_from_arr

In [None]:
def delete_border_lines(mask, x_ratio, y_ratio):
  height, width, depth = mask.shape
  vertical_limit = int(height * y_ratio)
  horizontal_limit = int(width * x_ratio)
  # delete upper border
  for i in range(0, vertical_limit):
    for j in range(width):
      mask[i][j][0] = 0
      mask[i][j][1] = 0
      mask[i][j][2] = 0
  # delete left border
  for i in range(height):
    for j in range(0, horizontal_limit):
      mask[i][j][0] = 0
      mask[i][j][1] = 0
      mask[i][j][2] = 0
  # delete lower border
  for i in range(height-vertical_limit, height):
    for j in range(width):
      mask[i][j][0] = 0
      mask[i][j][1] = 0
      mask[i][j][2] = 0
  # delete right border
  for i in range(height):
    for j in range(width - horizontal_limit, width):
      mask[i][j][0] = 0
      mask[i][j][1] = 0
      mask[i][j][2] = 0
  return mask

In [None]:
def delete_center_line(mask, left_ratio, right_ratio):
  height, width, depth = mask.shape
  center = width // 2
  left_border = center - int(width*left_ratio)
  right_border = center + int(width*right_ratio)
  print(center, left_border, right_border)
  # delete center line
  for i in range(height):
    for j in range(left_border, right_border):
      mask[i][j][0] = 0
      mask[i][j][1] = 0
      mask[i][j][2] = 0
  return mask

In [None]:
def next_neighbor_as_zero(height, width, mask):

  pixels_having_0_as_neighbor = []

  for i in range(1, height-1):
    for j in range(1, width-1):
      if mask[i][j][0] != 0:
        if mask[i-1][j][0] == 0: # downstairs neighbor
          pixels_having_0_as_neighbor.append((i, j))
        elif mask[i][j-1][0] == 0: # left neighbor
          pixels_having_0_as_neighbor.append((i, j))
        elif mask[i+1][j][0] == 0: # upstairs neighbor
          pixels_having_0_as_neighbor.append((i, j))
        elif mask[i][j+1][0] == 0: # right neighbor
          pixels_having_0_as_neighbor.append((i, j))
  return pixels_having_0_as_neighbor

In [None]:
def get_row_heights(bbox_y_pairs):
  row_heights = []
  for pair in bbox_y_pairs:
    row_height = abs(pair[0]-pair[1])
    row_heights.append(row_height)
  return row_heights

In [None]:
def get_col_widths(bbox_x_pairs):
  col_widths = []
  for pair in bbox_x_pairs:
    col_width = abs(pair[0]-pair[1])
    col_widths.append(col_width)
  return col_widths

In [None]:
def erase_lines_with_small_width(mask, bbox_x_pairs, safety_erase_ratio, col_width_limit):
  height, width, depth = mask.shape
  for pair in bbox_x_pairs:
    if abs(pair[0] - pair[1]) < col_width_limit:
      for i in range(height):
        for j in range(pair[0], pair[1]+int(width*safety_erase_ratio)):
          mask[i][j][0] = 0
          mask[i][j][1] = 0
          mask[i][j][2] = 0
  return mask


In [None]:
def erase_lines_with_small_height(mask, bbox_y_pairs, safety_erase_ratio, row_height_limit):
  height, width, depth = mask.shape
  for pair in bbox_y_pairs:
    if abs(pair[0] - pair[1]) < row_height_limit:
      for i in range(pair[0], pair[1]+int(height*safety_erase_ratio)):
        for j in range(width):
          mask[i][j][0] = 0
          mask[i][j][1] = 0
          mask[i][j][2] = 0
  return mask

In [None]:
def find_image_page_type(cols, line_gap_ratio):
  min_col_num = min(cols)
  max_col_num = max(cols)
  lines_area_width = abs(min_col_num - max_col_num) 
  line_gap_threshold = lines_area_width * line_gap_ratio
  for i in range(len(cols)-1):
    if (cols[i+1] - cols[i]) > line_gap_threshold:
      return "two page image"
  return "one_page_image"

In [None]:
def create_row_patches(y1, y2, coords):
  row_patch_coords = []
  for coord in coords:
    if coord[0] >= y1 and coord[0] <= y2:
      row_patch_coords.append(coord)
  return row_patch_coords

In [None]:
def create_unique_patch_cols(patch):
  patch_cols = [patch[i][1] for i in range(len(patch))]
  patch_cols = sorted(patch_cols)
  patch_cols = np.array(patch_cols)
  patch_unique_cols = np.unique(patch_cols)
  return patch_unique_cols

In [None]:
def create_bbox_list(rows, cols, line_gap_ratio, coords):
  page_type = find_image_page_type(cols, line_gap_ratio)
  bbox_list = []
  if page_type == "two page image":
    line_gap_threshold = abs(min(cols) - max(cols)) * line_gap_ratio
    for row_pair in rows:
      y1 = row_pair[0]
      y2 = row_pair[1]
      patch = create_row_patches(row_pair[0], row_pair[1], coords)
      patch_unique_cols = create_unique_patch_cols(patch)
      x_min = min(patch_unique_cols)
      x_max = max(patch_unique_cols)
      for i in range(len(cols)-1):
        if cols[i] >= x_min and cols[i] <= x_max:
          if abs(cols[i] - cols[i+1]) > line_gap_threshold:
            bbox_list.append((x_min, y1, cols[i], y2))
            bbox_list.append((x_min, cols[i+1], x_max, y2))
  else:
    for row_pair in rows:
      y1 = row_pair[0]
      y2 = row_pair[1]
      patch = create_row_patches(row_pair[0], row_pair[1], coords)
      patch_unique_cols = create_unique_patch_cols(patch)
      x_min = min(patch_unique_cols)
      x_max = max(patch_unique_cols)
      bbox_list.append((x_min, y1, x_max, y2))

  return bbox_list

In [None]:
def create_line_image_from_bbox(bbox, img, safety_gap_x, safety_gap_y):
  x1, y1, x2, y2 = bbox
  row = img[y1-safety_gap_y:y2+safety_gap_y, x1-safety_gap_x:x2+safety_gap_x, :]
  return row

def create_row_images(bbox_list, img, safety_gap_x, safety_gap_y):
  rows = []
  for i in range(len(bbox_list)):
    row = create_line_image_from_bbox(bbox_list[i], img, safety_gap_x, safety_gap_y)
    rows.append(row)
  return rows

In [None]:
img = cv2.imread("/content/drive/MyDrive/Images/T 2438---0022.jpeg")
img_mask = cv2.imread("/content/drive/MyDrive/output/test_page_predicted_masks/T 2438---0022_mask_320x320.png")

In [None]:
img_show_from_arr(img)
img_show_from_arr(img_mask)

In [None]:
mask = img_mask.copy()
mask = delete_border_lines(mask, 0.05, 0.1)

img_show_from_arr(img_mask)
img_show_from_arr(mask)

In [None]:
height, width, depth = img.shape
next_neighbor0_coords = next_neighbor_as_zero(height, width, mask)
next_neighbor0_mask = np.zeros(img.shape)

for coord in next_neighbor0_coords:
  next_neighbor0_mask[coord[0]][coord[1]][0] = 255
  next_neighbor0_mask[coord[0]][coord[1]][1] = 255
  next_neighbor0_mask[coord[0]][coord[1]][2] = 255

img_show_from_arr(next_neighbor0_mask)

In [None]:
len(next_neighbor0_coords)

71670

In [None]:
next_neighbor0_cols = []

for coord in next_neighbor0_coords:
  next_neighbor0_cols.append(coord[1])

len(next_neighbor0_cols)

71670

In [None]:
next_neighbor0_cols = np.array(next_neighbor0_cols)
next_neighbor0_unique_cols = np.unique(next_neighbor0_cols)
next_neighbor0_unique_cols[-100:]

array([2242, 2243, 2244, 2245, 2246, 2247, 2248, 2249, 2250, 2251, 2252,
       2253, 2254, 2255, 2256, 2257, 2258, 2259, 2260, 2261, 2262, 2263,
       2264, 2265, 2266, 2267, 2268, 2269, 2270, 2271, 2272, 2273, 2274,
       2275, 2276, 2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2285,
       2286, 2287, 2288, 2289, 2290, 2291, 2292, 2293, 2294, 2295, 2296,
       2297, 2298, 2299, 2300, 2301, 2302, 2303, 2304, 2305, 2306, 2307,
       2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, 2318,
       2319, 2320, 2321, 2322, 2323, 2324, 2325, 2326, 2327, 2328, 2329,
       2330, 2331, 2332, 2333, 2334, 2335, 2336, 2337, 2338, 2339, 2340,
       2341])

In [None]:
len(next_neighbor0_unique_cols)

1819

In [None]:
bbox_xcoords = []
bbox_xcoords.append(next_neighbor0_unique_cols[0])
for i in range(len(next_neighbor0_unique_cols)-1):
  if abs(next_neighbor0_unique_cols[i] - next_neighbor0_unique_cols[i+1]) > 3:
    bbox_xcoords.append(next_neighbor0_unique_cols[i])
    bbox_xcoords.append(next_neighbor0_unique_cols[i+1])
bbox_xcoords.append(next_neighbor0_unique_cols[-1])

In [None]:
len(bbox_xcoords)

4

In [None]:
bbox_xcoords

[319, 1238, 1443, 2341]

In [None]:
bbox_xcoords_pair = [(bbox_xcoords[i], bbox_xcoords[i+1]) for i in range(0, len(bbox_xcoords), 2)]
bbox_xcoords_pair

[(319, 1238), (1443, 2341)]

In [None]:
col_widths = get_col_widths(bbox_xcoords_pair)
col_widths

[919, 898]

In [None]:
mean_col_width = np.mean(col_widths)
mean_col_width

908.5

In [None]:
col_width_limit = mean_col_width * 0.9
col_width_limit

817.65

In [None]:
next_neighbor0_mask = erase_lines_with_small_width(next_neighbor0_mask, bbox_xcoords_pair, 0.01, col_width_limit)
img_show_from_arr(next_neighbor0_mask)

In [None]:
next_neighbor0_coords = next_neighbor_as_zero(height, width, next_neighbor0_mask)

In [None]:
next_neighbor0_rows = []

for coord in next_neighbor0_coords:
  next_neighbor0_rows.append(coord[0])

len(next_neighbor0_rows)

71670

In [None]:
next_neighbor0_rows = np.array(next_neighbor0_rows)
next_neighbor0_unique_rows = np.unique(next_neighbor0_rows)
next_neighbor0_unique_rows[200:300]

array([561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573,
       574, 575, 576, 577, 612, 613, 614, 615, 616, 617, 618, 619, 620,
       621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633,
       634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646,
       647, 648, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696,
       697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709,
       710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 755,
       756, 757, 758, 759, 760, 761, 762, 763, 764])

In [None]:
bbox_ycoords = []
bbox_ycoords.append(next_neighbor0_unique_rows[0])
for i in range(len(next_neighbor0_unique_rows)-1):
  if abs(next_neighbor0_unique_rows[i] - next_neighbor0_unique_rows[i+1]) > 3:
    bbox_ycoords.append(next_neighbor0_unique_rows[i])
    bbox_ycoords.append(next_neighbor0_unique_rows[i+1])
bbox_ycoords.append(next_neighbor0_unique_rows[-1])

In [None]:
len(bbox_ycoords)

40

In [None]:
bbox_ycoords

[198,
 233,
 265,
 300,
 335,
 374,
 406,
 443,
 475,
 508,
 545,
 577,
 612,
 648,
 686,
 721,
 755,
 794,
 828,
 863,
 892,
 934,
 966,
 1004,
 1039,
 1078,
 1106,
 1144,
 1184,
 1213,
 1252,
 1283,
 1316,
 1354,
 1391,
 1424,
 1464,
 1494,
 1532,
 1568]

In [None]:
bbox_ycoords_pair = [(bbox_ycoords[i], bbox_ycoords[i+1]) for i in range(0, len(bbox_ycoords), 2)]
bbox_ycoords_pair

[(198, 233),
 (265, 300),
 (335, 374),
 (406, 443),
 (475, 508),
 (545, 577),
 (612, 648),
 (686, 721),
 (755, 794),
 (828, 863),
 (892, 934),
 (966, 1004),
 (1039, 1078),
 (1106, 1144),
 (1184, 1213),
 (1252, 1283),
 (1316, 1354),
 (1391, 1424),
 (1464, 1494),
 (1532, 1568)]

In [None]:
len(bbox_ycoords_pair)

20

In [None]:
row_heights = get_row_heights(bbox_ycoords_pair)
row_heights

[35,
 35,
 39,
 37,
 33,
 32,
 36,
 35,
 39,
 35,
 42,
 38,
 39,
 38,
 29,
 31,
 38,
 33,
 30,
 36]

In [None]:
mean_row_height = np.mean(row_heights)
mean_row_height

35.5

In [None]:
row_height_limit = mean_row_height * 0.7
row_height_limit

24.849999999999998

In [None]:
next_neighbor0_mask = erase_lines_with_small_height(next_neighbor0_mask, bbox_ycoords_pair, 0.01, row_height_limit)
img_show_from_arr(next_neighbor0_mask)

In [None]:
next_neighbor0_coords = next_neighbor_as_zero(height, width, next_neighbor0_mask)

In [None]:
next_neighbor0_cols = []

for coord in next_neighbor0_coords:
  next_neighbor0_cols.append(coord[1])

len(next_neighbor0_cols)

71670

In [None]:
next_neighbor0_cols = np.array(next_neighbor0_cols)
next_neighbor0_unique_cols = np.unique(next_neighbor0_cols)

In [None]:
row_patches = []
for pair in bbox_ycoords_pair:
  if abs(pair[0] - pair[1]) > row_height_limit:
    row_patches.append(pair)

len(row_patches)

20

In [None]:
find_image_page_type(next_neighbor0_unique_cols, 0.08)

'two page image'

In [None]:
row_patches

[(198, 233),
 (265, 300),
 (335, 374),
 (406, 443),
 (475, 508),
 (545, 577),
 (612, 648),
 (686, 721),
 (755, 794),
 (828, 863),
 (892, 934),
 (966, 1004),
 (1039, 1078),
 (1106, 1144),
 (1184, 1213),
 (1252, 1283),
 (1316, 1354),
 (1391, 1424),
 (1464, 1494),
 (1532, 1568)]

In [None]:
patch = create_row_patches(row_patches[0][0], row_patches[0][1], next_neighbor0_coords)
patch

[(198, 964),
 (198, 965),
 (198, 966),
 (198, 967),
 (198, 968),
 (198, 969),
 (198, 970),
 (198, 974),
 (198, 975),
 (199, 959),
 (199, 960),
 (199, 961),
 (199, 962),
 (199, 963),
 (199, 971),
 (199, 972),
 (199, 973),
 (199, 976),
 (199, 977),
 (199, 978),
 (199, 979),
 (199, 980),
 (199, 981),
 (199, 982),
 (199, 983),
 (199, 984),
 (199, 985),
 (199, 986),
 (199, 987),
 (199, 988),
 (199, 989),
 (199, 990),
 (199, 991),
 (199, 992),
 (199, 993),
 (199, 994),
 (199, 995),
 (199, 996),
 (199, 997),
 (199, 998),
 (199, 999),
 (199, 1000),
 (199, 1001),
 (199, 1002),
 (199, 1003),
 (199, 1042),
 (199, 1043),
 (199, 1044),
 (199, 1045),
 (199, 1046),
 (199, 1047),
 (199, 1048),
 (199, 1049),
 (199, 1050),
 (199, 1051),
 (199, 1052),
 (199, 1053),
 (199, 1054),
 (199, 1055),
 (199, 1056),
 (199, 1057),
 (199, 1058),
 (199, 1059),
 (199, 1060),
 (199, 1061),
 (199, 1062),
 (199, 1063),
 (199, 1064),
 (199, 1065),
 (200, 959),
 (200, 1003),
 (200, 1042),
 (200, 1065),
 (201, 916),
 (201, 

In [None]:
bboxes = create_bbox_list(row_patches, next_neighbor0_unique_cols, 0.08, next_neighbor0_coords)
len(bboxes)

38