In [1]:
import os
import sys 
sys.path.append("..")

import matplotlib.pyplot as plt

from run import deskew_image

from utils.image import Image

In [2]:
def find_images(directory, extension):
    """
    Returns the abs path to all files with a specified extension in a parent directory.

    Parameters
    ----------
    directory : str
        The parent directory to search for the files.
    extension : str
        The extension of the files to be found.

    Returns
    -------
    images : list
        The list of absolute paths to the found files.
    """
    images = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(extension):
                file_path = os.path.join(root, file)
                images.append(file_path)
    return images

In [3]:
images = find_images("/scratch/gpfs/RUSTOW/htr_deskewing_image_dataset/NEED_DESKEWING", ".tif")

In [29]:
import numpy as np

from deskew import determine_skew

# from jdeskew.estimator import get_angle



def calculate_skew_angle(image: np.ndarray) -> float:
    """Calculate the skew angle of an image.

    Parameters
    ----------
    image : np.ndarray
        The image data in RGB format.

    Returns
    -------
    float
        The skew angle of the image in degrees where an angle > 0 is a counter-clockwise rotation and an angle < 0 is a
        clockwise rotation.
    """
    return determine_skew(image, min_deviation=0.025, num_peaks=100)
    # return get_angle(image)


In [30]:
import argparse
import numpy as np

from bounding_box_generator import BoundingBoxGenerator
from instance_segmentation_generator import InstanceSegmentationGenerator
from utils.image import Image
# from document_skew_estimator import calculate_skew_angle
from skimage.io import imsave

def deskew_image(image_path: str, output_path: str):
    """Deskew an image.

    Parameters
    ----------
    image_path : str
        The path to the image file to be deskewed.
    output_path : str
        The path to the output image file.
    """
    image = Image(image_path)

    #### TEMPORARY CODE ####
    grounding_dino_config_path = "/scratch/gpfs/eh0560/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
    grounding_dino_weight_path = "/scratch/gpfs/eh0560/imagedeskewing/models/grounding_dino_models/groundingdino_swint_ogc.pth"

    sam_checkpoint_path = "../../models/sam_models/sam_vit_h_4b8939.pth"
    model_type = "vit_h"

    text_prompt = "old brown paper"
    box_threshold = 0.50
    text_threshold = 0.25
    ########################

    bbg = BoundingBoxGenerator(grounding_dino_config_path, grounding_dino_weight_path)
    detections = bbg.find_objects(image.as_array(), text_prompt, box_threshold, text_threshold)

    isg = InstanceSegmentationGenerator(model_type, sam_checkpoint_path)
    detections.mask = isg.segment_objects(image.as_array(), detections.xyxy)

    # Flattening all the masks to a single mask.
    mask = np.any(detections.mask, axis=0)

    # Computing the smallest bounding box that contains all the masks.
    x0 = int(detections.xyxy[:, 0].min())
    y0 = int(detections.xyxy[:, 1].min())
    x1 = int(detections.xyxy[:, 2].max())
    y1 = int(detections.xyxy[:, 3].max())

    # Adding padding so the image is not cropped too tightly.
    # Found that this improves the accuracy of the skew angle estimation.
    padding = -10
    x0 = max(0, x0 - padding)
    y0 = max(0, y0 - padding)
    x1 = min(image.get_width(), x1 + padding)
    y1 = min(image.get_height(), y1 + padding)

    cropped_image = image.as_array()[y0:y1, x0:x1]

    skew_angle = calculate_skew_angle(cropped_image)
    print(f"angle = {skew_angle}")
    imsave(output_path, image.rotated(skew_angle))

In [39]:
deskew_image(images[57], "./tmp.jpg")

final text_encoder_type: bert-base-uncased


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ValueError: zero-size array to reduction operation minimum which has no identity

In [None]:
img = Image("./tmp.jpg")

fig = plt.figure(figsize=(10,10))  # specify the figure size in inches
plt.imshow(img.as_array())
plt.axis('off')
plt.show()