In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import cv2
from pathlib import Path


In [3]:
input_dir = Path(r"/content/drive/MyDrive/Gadulla/Cleaned data")
output_dir = Path(r"best_resoulution_images")
output_dir.mkdir(exist_ok=True)
print("Done")

Done


In [4]:
TARGET_SIZE = (2400, 1100)

In [5]:
for img_path in input_dir.glob("*"):
    img = cv2.imread(str(img_path))
    if img is None:
        print(f"Could not read {img_path.name}")
        continue

    resized = cv2.resize(img, TARGET_SIZE, interpolation=cv2.INTER_AREA)
    out_path = output_dir / img_path.name
    cv2.imwrite(str(out_path), resized)
print("All images resized.")

All images resized.


In [6]:
input_folder = Path(r"best_resoulution_images")

In [7]:
# Do not change Shivanshu
crop_regions = {
    "date": (1766, 66, 2342, 192),
    "payee": (66, 198, 2106, 120),
    "amount_words":  (68, 298, 2300, 210),
    "amount_digits": (1700, 360, 500, 160)
}

In [8]:
output_base = Path(r"cropped_fields")
for field in crop_regions:
    (output_base / field).mkdir(parents=True, exist_ok=True)

In [9]:
image_extensions = [".jpg", ".jpeg", ".png"]
image_paths = [p for p in input_folder.glob("*") if p.suffix.lower() in image_extensions]
print(f"Found {len(image_paths)} images to crop.")

Found 3633 images to crop.


In [10]:
for img_path in image_paths:
    img = cv2.imread(str(img_path))
    if img is None:
        print(f"Could not read image: {img_path}")
        continue

    basename = img_path.stem

    for field, (x, y, w, h) in crop_regions.items():
        crop = img[y:y+h, x:x+w]
        crop_path = output_base / field / f"{basename}_{field}.jpg"
        cv2.imwrite(str(crop_path), crop)
print("All fields cropped and saved into their respective folders.")

All fields cropped and saved into their respective folders.


# Whited rectangle mask code

In [11]:
input_folder = Path(r"cropped_fields\payee")
output_folder = Path(r"whited_payee")
output_folder.mkdir(parents=True, exist_ok=True)

In [12]:
# I have updated it Amrit
whiteout_regions = [
    (2, 0, 78, 118),
    (8, 104, 2096, 124),
    (2096, 0, 2036, 120),
    (48, 46, 104, 92),
    (2044, 36, 2096, 82)
]

In [13]:
image_extensions = [".jpg", ".jpeg", ".png"]
for img_path in input_folder.glob("*"):
    if img_path.suffix.lower() not in image_extensions:
        continue

    img = cv2.imread(str(img_path))
    if img is None:
        print(f"⚠️ Skipping unreadable image: {img_path.name}")
        continue
    for (x1, y1, x2, y2) in whiteout_regions:
        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 255), -1)  # white fill

    # Save the result
    output_path = output_folder / img_path.name
    cv2.imwrite(str(output_path), img)
print("Whiteout complete", output_folder)

Whiteout complete whited_payee
