In [None]:
import json
import shutil
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Tuple

import cv2
import easyocr
import numpy as np
import torch
from numpy import ndarray
from paddleocr import LayoutDetection, TextDetection
from scipy.signal import find_peaks

prep_root = Path.cwd()
IMAGE_PATH = prep_root / "images" / "example.jpg"

device = "cuda" if torch.cuda.is_available() else "cpu"
has_gpu = torch.cuda.is_available()

In [None]:
# 1: Deskew manually (optional)


def deskew(image_path: Path, angle: float) -> None:
    image_path = Path(image_path)
    working_dir = image_path.parent / image_path.stem
    working_dir.mkdir(exist_ok=True)
    working_image = working_dir / image_path.name

    source = image_path
    img = cv2.imread(str(source), cv2.IMREAD_UNCHANGED)

    h, w = img.shape[:2]
    M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
    deskewed = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

    cv2.imwrite(str(working_image), deskewed)
    print(f"Rotated by {angle}° -> {working_image}")


deskew(IMAGE_PATH, 0.1)

In [None]:
# 2: Crop to table region (PaddleOCR handles dark scan artifacts better than alternatives)

# Parameters are found using 'line_detection.py'
LINE_DETECTION_CONFIG = {
    "blur_kernel": 5,
    "canny_low": 50,
    "canny_high": 150,
    "morph_kernel": 3,
    "hough_threshold": 100,
    "min_line_length": 2000,
    "max_line_gap": 1,
    "strip_height": 21,
    "density_threshold": 2.8,
}


def _find_table_top_edge(image: ndarray, vis: ndarray, config: Dict[str, Any], current_y1: int) -> int:
    """
    Refines the top edge of the table using Hough lines and edge density.
    """
    h, w = image.shape[:2]

    blur_k = config["blur_kernel"]
    morph_k = config["morph_kernel"]

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    processed = cv2.GaussianBlur(gray, (blur_k, blur_k), 0)
    edges = cv2.Canny(processed, config["canny_low"], config["canny_high"])

    kernel = np.ones((morph_k, morph_k), np.uint8)
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

    detections = []

    # Method A: Hough Lines
    lines = cv2.HoughLinesP(
        edges,
        1,
        np.pi / 180,
        config["hough_threshold"],
        minLineLength=config["min_line_length"],
        maxLineGap=config["max_line_gap"],
    )

    if lines is not None:
        filtered_lines = []
        for line in lines:
            lx1, ly1, lx2, ly2 = line[0]
            angle = abs(np.arctan2(ly2 - ly1, lx2 - lx1) * 180 / np.pi)

            # Filter: Keep only horizontal lines (0° or 180° range)
            if angle <= 15 or angle >= 165:
                filtered_lines.append(line)

        for line in filtered_lines:
            lx1, ly1, lx2, ly2 = line[0]

            cv2.line(vis, (lx1, ly1), (lx2, ly2), (90, 255, 255), 6)
            detections.append({"center": max(ly1, ly2), "type": "basic_hough"})

    # Method B: Edge Density
    densities = []
    strip_h = config["strip_height"]

    for y in range(0, h - strip_h, 10):
        strip = edges[y : y + strip_h, :]
        density = np.sum(strip) / (strip_h * w)
        densities.append((y + strip_h // 2, density))

    dens_vals = [d[1] for d in densities]
    if dens_vals:
        threshold = np.mean(dens_vals) + config["density_threshold"] * np.std(dens_vals)
        for y, density in densities:
            if density > threshold:
                cv2.line(vis, (0, y), (w, y), (90, 255, 255), 6)
                detections.append({"center": y, "type": "edge_density"})

    if detections:
        # The true top edge is the lowest (highest Y value) line found above the data
        max_det = max(detections, key=lambda x: x["center"])
        return max(current_y1, max_det["center"])

    return current_y1


def crop(image_path: Path, debug=True):
    """
    Crops the image to the table region using a multi-stage approach:
    1. PaddleOCR Layout to find the rough table bbox.
    2. Text detection to expand the bbox if text protrudes.
    3. Heuristic edge detection to refine the top header line.
    """
    image_path = Path(image_path)
    stem = image_path.stem
    working_dir = image_path.parent / stem
    working_dir.mkdir(exist_ok=True)
    working_image = working_dir / image_path.name

    source = working_image if working_image.exists() else image_path

    layout_model = LayoutDetection(model_name="PP-DocLayout_plus-L")
    text_model = TextDetection(model_name="PP-OCRv5_server_det")

    img = cv2.imread(str(source))
    vis = img.copy()
    h, w = img.shape[:2]

    # --- Step 1: Coarse Table Detection (Layout Analysis) ---
    layout_detection_results = layout_model.predict(str(source), batch_size=1, layout_nms=True, threshold=0.2)

    table_bbox = None

    for res in layout_detection_results:
        if "boxes" not in res:
            continue
        tables = [b for b in res["boxes"] if b.get("label") == "table"]
        if tables:
            # Pick highest confidence table
            b = max(tables, key=lambda x: x.get("score", 0))
            c = b["coordinate"]
            table_bbox = [int(c[0]), int(c[1]), int(c[2]), int(c[3])]
            break

    if table_bbox is None:
        try:
            if source != working_image:
                shutil.copy2(source, working_image)
                if debug:
                    print(f"No table detected — copied original to {working_image}")
            else:
                if debug:
                    print(f"No table detected — image already in working dir")
        except OSError as e:
            print(f"Copy failed for {stem}: {e}")
        return

    x1, y1, x2, y2 = table_bbox
    cv2.rectangle(vis, (x1, y1), (x2, y2), (255, 0, 0), 30)
    gx1, gy1, gx2, gy2 = table_bbox  # Save original global detection

    # --- Step 2: Expand Sides (Text Detection) ---
    # If text boxes protrude from the layout bbox, expand to include them
    text_detection_results = text_model.predict(str(source), batch_size=1)

    for res in text_detection_results:
        if "dt_polys" not in res:
            continue
        for poly in res["dt_polys"]:
            poly = np.asarray(poly)
            px1, py1 = int(np.min(poly[:, 0])), int(np.min(poly[:, 1]))
            px2, py2 = int(np.max(poly[:, 0])), int(np.max(poly[:, 1]))

            # Check if text is vertically within the table region
            inside_vertically = py1 >= gy1 and py2 <= gy2
            if inside_vertically:
                if px1 < gx1 and px2 > gx1:
                    x1 = min(x1, px1)  # Expand Left
                if px2 > gx2 and px1 < gx2:
                    x2 = max(x2, px2)  # Expand Right

            cv2.rectangle(vis, (px1, py1), (px2, py2), (92, 183, 206), 1)

    # --- Step 3: Refine Top Edge (Line/Density Detection) ---
    # Finds the specific line separating headers from data=
    new_y1 = _find_table_top_edge(img, vis, LINE_DETECTION_CONFIG, y1)

    if new_y1 > y1:
        # Lowest edge line
        cv2.line(vis, (0, new_y1), (w, new_y1), (0, 0, 255), 10)
        y1 = new_y1

    # --- Step 4: Final Crop & Save ---
    x1, y1 = max(0, x1), max(0, y1)
    x2, y2 = min(w, x2), min(h, y2)

    if x2 > x1 and y2 > y1:
        cropped = img[y1:y2, x1:x2]
        cv2.imwrite(str(working_image), cropped)
        cv2.rectangle(vis, (x1, y1), (x2, y2), (81, 255, 0), 20)  # Green

        if debug:
            cv2.imwrite(str(working_dir / f"{stem}_crop.jpg"), vis)
            print(f"Cropped: {working_image}")


crop(IMAGE_PATH)

In [None]:
# 3: Text detection


reader = easyocr.Reader(["en"], gpu=has_gpu)

# Parameters are found using 'find_text_bbox.py'
TEXT_BBOX_CONFIG = {
    "threshold": 125,
    "kernel_size": (5, 5),
    "iterations": 2,
}


def has_overlap(ocv_bbox: list[int], easyocr_bboxes: list[list[int]]) -> bool:
    for eb in easyocr_bboxes:
        if ocv_bbox[0] < eb[2] and ocv_bbox[2] > eb[0] and ocv_bbox[1] < eb[3] and ocv_bbox[3] > eb[1]:
            return True
    return False


def detect_text_bboxes_opencv(image: np.ndarray, config: dict) -> list[list[int]]:
    """
    Generates precise text bounding boxes by intersecting OpenCV morphological
    candidates with EasyOCR detections to filter noise.
    """
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, config["threshold"], 255, cv2.THRESH_BINARY_INV)
    # Note: numpy shape is (height, width), so kernel_size[1] then [0]
    kernel = np.ones((config["kernel_size"][1], config["kernel_size"][0]), np.uint8)
    dilated = cv2.dilate(thresh, kernel, iterations=config["iterations"])

    contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    bboxes = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        bboxes.append([x, y, x + w, y + h])

    return bboxes


def detect_text_bboxes_easyocr(reader: easyocr.Reader, working_image: Path, height: int, width: int) -> list[list[int]]:
    horizontal_list, free_list = reader.detect(
        str(working_image),
        min_size=1,
        text_threshold=0.3,
        low_text=0.3,
        link_threshold=0.05,
        width_ths=0.1,
        canvas_size=max(height, width),
        mag_ratio=1.5,
    )

    bboxes = []
    for x_min, x_max, y_min, y_max in horizontal_list[0]:
        bboxes.append([int(x_min), int(y_min), int(x_max), int(y_max)])

    for points in free_list[0]:
        points = np.array(points)
        x1, y1 = int(np.min(points[:, 0])), int(np.min(points[:, 1]))
        x2, y2 = int(np.max(points[:, 0])), int(np.max(points[:, 1]))
        bboxes.append([x1, y1, x2, y2])

    return bboxes


def detect_text_bboxes(
    image_path: Path, reader: easyocr.Reader, config: dict, debug_easyocr: bool = False
) -> list[list[int]]:
    """
    Detects precise text bounding boxes by filtering OpenCV candidates with EasyOCR.

    The logic is:
    1. Generate tight, precise bounding boxes using OpenCV morphological operations.
    2. Generate rough text regions using EasyOCR.
    3. Keep OpenCV boxes ONLY if they overlap with an EasyOCR region.

    This combines OpenCV's precision with EasyOCR's ability to ignore
    non-text artifacts (like binding holes or scan artifacts).
    """
    image_path = Path(image_path)
    stem = image_path.stem
    working_dir = image_path.parent / stem
    working_image = working_dir / image_path.name

    image = cv2.imread(str(working_image))
    height, width = image.shape[:2]

    easyocr_bboxes = detect_text_bboxes_easyocr(reader, working_image, height, width)
    opencv_bboxes = detect_text_bboxes_opencv(image, config)

    if debug_easyocr:
        debug_vis = image.copy()
        for x1, y1, x2, y2 in easyocr_bboxes:
            cv2.rectangle(debug_vis, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.imwrite(str(working_dir / f"{stem}_easyocr_bbox.jpg"), debug_vis)

    filtered_bboxes = [b for b in opencv_bboxes if has_overlap(b, easyocr_bboxes)]

    # Keeps numbering consistent (Top-to-Bottom, Left-to-Right)
    filtered_bboxes.sort(key=lambda b: (b[1], b[0]))

    vis = image.copy()
    for x1, y1, x2, y2 in filtered_bboxes:
        cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 0, 255), 2)

    output_path = working_dir / f"{stem}_txt_bbox.jpg"
    cv2.imwrite(str(output_path), vis)

    with (working_dir / f"{stem}_bbox.json").open("w") as f:
        json.dump(filtered_bboxes, f, indent=2)

    return filtered_bboxes


bboxes = detect_text_bboxes(IMAGE_PATH, reader, TEXT_BBOX_CONFIG, debug_easyocr=True)

In [None]:
# 4: Detect row/column lines, assign text bboxes to cells and store all data for future use

# Parameters are found using 'find_peak_intensity.py'
PEAK_CONFIG = {
    "denoise_strength": 10,
    "threshold": 234,
    "row_min_height": 103,
    "row_min_distance": 58,
    "row_prominence": 106,
    "col_min_height": 34,
    "col_min_distance": 34,
    "col_prominence": 89,
}


def detect_separators(image_path: Path, bbox_data: list[list[int]], config: dict) -> dict[str, Any]:
    image = cv2.imread(str(image_path))
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Use config for denoising strength (h) and thresholding
    gray = cv2.fastNlMeansDenoising(gray, None, config["denoise_strength"], 7, 21)
    _, gray = cv2.threshold(gray, config["threshold"], 255, cv2.THRESH_BINARY)

    height, width = gray.shape

    # Detect Row Separators
    h_projection = np.mean(255 - gray, axis=1)
    h_peaks, _ = find_peaks(
        h_projection,
        height=config["row_min_height"],
        distance=config["row_min_distance"],
        prominence=config["row_prominence"],
    )
    row_separators = sorted(list(set([0] + h_peaks.tolist() + [height])))

    # Detect Column Separators
    v_projection = np.mean(255 - gray, axis=0)
    v_peaks, _ = find_peaks(
        v_projection,
        height=config["col_min_height"],
        distance=config["col_min_distance"],
        prominence=config["col_prominence"],
    )
    col_separators = v_peaks.tolist()

    # Ensure column separators encompass all text
    if bbox_data:
        all_x_coords = [coord for bbox in bbox_data for coord in (bbox[0], bbox[2])]
        # Guard clause in case bbox_data is empty
        if all_x_coords:
            min_text_x = min(all_x_coords)
            max_text_x = max(all_x_coords)

            if not col_separators or min_text_x < col_separators[0]:
                col_separators.insert(0, 0)

            if not col_separators or max_text_x > col_separators[-1]:
                col_separators.append(width)

    col_separators = sorted(list(set(col_separators)))

    if len(col_separators) < 2:
        col_separators = [0, width]

    return {
        "image_dimensions": {"width": width, "height": height},
        "row_separators": row_separators,
        "column_separators": col_separators,
    }


def assign_text_to_cells(separators: dict[str, Any], bbox_data: list[list[int]]) -> dict[str, dict[str, Any]]:
    row_seps = separators["row_separators"]
    col_seps = separators["column_separators"]

    grid = {}
    logical_row = 0

    for r in range(len(row_seps) - 1):
        top = row_seps[r]
        bottom = row_seps[r + 1]

        # Check if row contains any text
        row_bboxes = [i for i, (x1, y1, x2, y2) in enumerate(bbox_data) if top <= (y1 + y2) / 2 <= bottom]
        if not row_bboxes:
            continue

        for c in range(len(col_seps) - 1):
            cell_key = f"{logical_row},{c}"
            grid[cell_key] = {
                "row": logical_row,
                "column": c,
                "bounds": {
                    "top": top,
                    "bottom": bottom,
                    "left": col_seps[c],
                    "right": col_seps[c + 1],
                },
                "text_elements": [],
            }

            # Assign specific text boxes to this cell
            for i, coords in enumerate(bbox_data):
                x1, y1, x2, y2 = coords
                cx, cy = (x1 + x2) / 2, (y1 + y2) / 2

                # Check if center of text box is within cell bounds
                if col_seps[c] <= cx <= col_seps[c + 1] and top <= cy <= bottom:
                    grid[cell_key]["text_elements"].append(
                        {"bbox": {"x1": x1, "y1": y1, "x2": x2, "y2": y2}, "index": i}
                    )

        logical_row += 1

    return grid


def create_table_visualization(image_path: Path, result: dict[str, Any]) -> np.ndarray:
    image = cv2.imread(str(image_path))
    vis_image = image.copy()

    separators = result["separators"]
    height, width = vis_image.shape[:2]

    # Draw Separators
    for y in separators["row_separators"]:
        cv2.line(vis_image, (0, y), (width, y), (0, 120, 255), 2)

    for x in separators["column_separators"]:
        cv2.line(vis_image, (x, 0), (x, height), (0, 255, 0), 2)

    # Draw Cells and Indices
    grid = result["table_grid"]
    for cell_key, cell in grid.items():
        row, col = cell["row"], cell["column"]
        bounds = cell["bounds"]

        offset = 10

        text_x = bounds["left"] + offset
        text_y = bounds["top"] + 20

        cv2.putText(
            vis_image,
            f"({row},{col})",
            (text_x, text_y),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.4,
            (255, 0, 0),
            1,
        )

        for text_elem in cell["text_elements"]:
            bbox = text_elem["bbox"]
            x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]

            cv2.rectangle(vis_image, (x1, y1), (x2, y2), (255, 0, 0), 1)
            cv2.putText(vis_image, str(text_elem["index"]), (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)

    return vis_image


def detect_table_structure(image_path: Path, config: dict) -> None:
    image_path = Path(image_path)
    stem = image_path.stem
    working_dir = image_path.parent / stem
    working_image = working_dir / image_path.name
    bbox_json = working_dir / f"{stem}_bbox.json"

    bbox_data = []
    if bbox_json.exists():
        with bbox_json.open("r") as f:
            bbox_data = json.load(f)

    # Pass the config dictionary directly
    separators = detect_separators(working_image, bbox_data, config)

    grid = assign_text_to_cells(separators, bbox_data)
    result = {"image_name": stem, "separators": separators, "table_grid": grid}

    with (working_dir / f"{stem}_table.json").open("w") as f:
        json.dump(result, f, indent=2)

    vis_image = create_table_visualization(working_image, result)
    cv2.imwrite(str(working_dir / f"{stem}_table.jpg"), vis_image)


detect_table_structure(IMAGE_PATH, PEAK_CONFIG)

In [None]:
# 5: Segment text by row, reordering columns within each segment

SEGMENT_PADDING = 65  # Pads the ends of a segment (px)
COLUMN_GAP = 30  # Spacing between column text (px)


def crop_text_from_column(image: np.ndarray, bboxes: list[dict[str, int]]) -> dict[str, Any] | None:
    """Extract and combine all text bboxes from a single column into one image."""
    if not bboxes:
        return None

    min_x = min(bbox["x1"] for bbox in bboxes)
    max_x = max(bbox["x2"] for bbox in bboxes)
    min_y = min(bbox["y1"] for bbox in bboxes)
    max_y = max(bbox["y2"] for bbox in bboxes)

    width = max_x - min_x
    height = max_y - min_y

    result = np.full((height, width, 3), 255, dtype=np.uint8)

    for bbox in bboxes:
        x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]
        bbox_crop = image[y1:y2, x1:x2]

        if bbox_crop.size == 0:
            continue

        rel_x = x1 - min_x
        rel_y = y1 - min_y
        result[rel_y : rel_y + bbox_crop.shape[0], rel_x : rel_x + bbox_crop.shape[1]] = bbox_crop

    return {"image": result, "width": width, "height": height}


def assemble_segment(image: np.ndarray, bboxes: list[dict[str, Any]], column_order: list[int]) -> np.ndarray:
    """Combine multiple columns into a single segment image, ordered by column_order."""
    if not bboxes:
        return np.full((100, 100, 3), 255, dtype=np.uint8)

    columns = []
    max_height = 0

    for col_idx in column_order:
        col_bboxes = [bbox for bbox in bboxes if bbox["column"] == col_idx]
        if col_bboxes:
            col_content = crop_text_from_column(image, col_bboxes)
            if col_content:
                columns.append(col_content)
                max_height = max(max_height, col_content["height"])

    if not columns:
        return np.full((100, 100, 3), 255, dtype=np.uint8)

    total_segment_width = SEGMENT_PADDING
    for i, col in enumerate(columns):
        total_segment_width += col["width"]
        if i < len(columns) - 1:
            total_segment_width += COLUMN_GAP
    total_segment_width += SEGMENT_PADDING

    result = np.full((max_height, total_segment_width, 3), 255, dtype=np.uint8)

    x_position = SEGMENT_PADDING
    for i, col in enumerate(columns):
        y_offset = (max_height - col["height"]) // 2
        result[y_offset : y_offset + col["height"], x_position : x_position + col["width"]] = col["image"]

        x_position += col["width"]
        if i < len(columns) - 1:
            x_position += COLUMN_GAP

    return result


def group_cells_by_row(grid: dict[str, dict]) -> dict[str, list[dict]]:
    """Group cell data by row index."""
    rows = defaultdict(list)
    for cell_key, cell_data in grid.items():
        row_idx = cell_key.split(",")[0]
        rows[row_idx].append({"cell_key": cell_key, "cell_data": cell_data})
    return dict(rows)


def pad_to_height(segment: np.ndarray, target_height: int) -> np.ndarray:
    """Center segment vertically within target height."""
    current_height = segment.shape[0]
    if current_height >= target_height:
        return segment[:target_height]

    result = np.full((target_height, segment.shape[1], 3), 255, dtype=np.uint8)
    y_offset = (target_height - current_height) // 2
    result[y_offset : y_offset + current_height] = segment
    return result


def create_row_segments(
    image: np.ndarray, row_cells: list[dict], column_groups: list[list[int]]
) -> list[np.ndarray | None]:
    """Create segment images for a single row, one per column group."""
    all_bboxes = []
    for cell_info in row_cells:
        cell_data = cell_info["cell_data"]
        for text_elem in cell_data.get("text_elements", []):
            bbox = text_elem["bbox"]
            all_bboxes.append(
                {
                    "x1": bbox["x1"],
                    "y1": bbox["y1"],
                    "x2": bbox["x2"],
                    "y2": bbox["y2"],
                    "column": cell_data["column"],
                }
            )

    if not all_bboxes:
        return [None] * len(column_groups)

    segments = []
    for column_group in column_groups:
        group_bboxes = [bbox for bbox in all_bboxes if bbox["column"] in column_group]
        if group_bboxes:
            assembled = assemble_segment(image, group_bboxes, column_group)
            # Fixed px height of segments
            final = pad_to_height(assembled, 100)
            segments.append(final)
        else:
            segments.append(None)

    return segments


def segment_text_by_row(image_path: Path, column_groups: list[list[int]]) -> list[str]:
    """
    Segments the image into row strips based on the detected table grid.

    It reorders and groups columns within each row based on the 'column_groups'
    configuration, allowing you to restructure the table layout during extraction.

    Args:
        image_path: Path to the original image.
        column_groups: A list of lists defining the output order and grouping.
        Ex: [[0, 1], [2]] will create two images per row:
        one combining cols 0 & 1, and a second image containing only col 2.

    Returns:
        List of paths to the generated segment images.
    """
    image_path = Path(image_path)
    stem = image_path.stem
    working_dir = image_path.parent / stem
    working_image = working_dir / image_path.name
    table_json = working_dir / f"{stem}_table.json"
    output_dir = working_dir / "rows"

    image = cv2.imread(str(working_image))

    with table_json.open("r") as f:
        table_data = json.load(f)

    grid = table_data["table_grid"]
    output_dir.mkdir(exist_ok=True)

    rows = group_cells_by_row(grid)
    output_paths = []

    sorted_row_ids = sorted(rows.keys(), key=lambda x: int(x))

    for row_id in sorted_row_ids:
        row_cells = rows[row_id]
        segments = create_row_segments(image, row_cells, column_groups)

        for seg_idx, segment in enumerate(segments, 1):
            if segment is not None:
                filename = f"{stem}_{row_id}-{seg_idx}.jpg"
                output_path = output_dir / filename
                cv2.imwrite(str(output_path), segment)
                output_paths.append(str(output_path))

    print(f"Segmented {len(output_paths)} row images")
    return output_paths


column_groups = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]]
# column_groups = [[13], [12], [11], [10], [9], [8], [7], [6], [5], [4], [3], [2], [1], [0]]
# column_groups = [[8, 3, 1, 4], [13, 10,11, 0, 9], [5, 2, 6, 7, 12]]
segment_text_by_row(IMAGE_PATH, column_groups)