# Draw Region of Interest (ROI) on an image in OpenCV with Python

In [53]:
from matplotlib import pyplot as plt
import cv2
import os
from pdf2image import convert_from_path
from PIL import Image
import pytesseract
import numpy as np
import re
import pandas as pd

In [54]:
cwd_path = os.getcwd()
cwd_path = cwd_path + '\\'
cwd_path

'c:\\Users\\Jun\\Desktop\\RDS3\\FYP2\\'

In [55]:
bank_chosen = None

print('+----------------------+')
print('| Supported PDF files: |')
print('+----------------------+')
print('| 1. BCA               |')
print('| 2. Mandiri           |')
print('| 3. Exit              |')
print('+----------------------+')

choice = int(input('Select the PDF file you want to extract now (1-3): '))

if choice == 1:
    bank_chosen = 'BCA'
elif choice == 2:
    bank_chosen = 'Mandiri'
else:
    print('Exit!')


if bank_chosen is not None:
    print(f'Bank selected: {bank_chosen}')

+----------------------+
| Supported PDF files: |
+----------------------+
| 1. BCA               |
| 2. Mandiri           |
| 3. Exit              |
+----------------------+
Bank selected: BCA


In [56]:
# Convert PDF file to images using "convert_from_path function" from "pdf2image"
images = convert_from_path(pdf_path = f'Data Source PDF\{bank_chosen}.pdf', dpi=350)
for index, img in enumerate(images):
    img.save(f'PDF Images\{bank_chosen}\page{index + 1}.jpg', 'JPEG')

In [57]:
# Show each page of PDF file that has been converted to image
all_pdf_pages_in_img = next(os.walk(f'{cwd_path}PDF Images\{bank_chosen}'))[2]
print(all_pdf_pages_in_img)

['page1.jpg', 'page2.jpg', 'page3.jpg', 'page4.jpg']


In [58]:
# Create a dictionary to store all the image paths, each image path represents a page of the PDF file
# Key: page number (e.g. 1, 2, 3, etc.)
# Value: image path (e.g. .\PDF Images\BCA\page1.jpg, .\PDF Images\BCA\page2.jpg, etc.)
image_path_dict = {}

for index, page in enumerate(all_pdf_pages_in_img):
    image_path_dict[index + 1] = os.path.join('.', f'PDF Images\{bank_chosen}\{page}')
    print(f'Image path {index + 1}: {image_path_dict[index + 1]}')

Image path 1: .\PDF Images\BCA\page1.jpg
Image path 2: .\PDF Images\BCA\page2.jpg
Image path 3: .\PDF Images\BCA\page3.jpg
Image path 4: .\PDF Images\BCA\page4.jpg


In [59]:
# # Read image
# img_raw = cv2.imread(image_path_dict[2])
# print(img_raw.shape)

# # We need to keep in mind aspect ratio so the image does not look skewed or distorted. 
# # Therefore, we calculate the ratio of the new image to the old image
# r = 578.0 / img_raw.shape[0]
# dim = (578, int(img_raw.shape[0] * r))

# # Perform the actual resizing of the image and show it
# resized_img = cv2.resize(img_raw, dim, interpolation = cv2.INTER_AREA)

# # Select ROIs function
# ROIs = cv2.selectROIs(windowName = "Select Rois", img = resized_img)

# # Print rectangle points of selected roi
# print(f'Type: {type(ROIs)}')
# print(f'{ROIs}')

# for x in ROIs:
#     # Print rectangle points of selected roi
#     print(f'Type: {type(x)}')
#     print(f'{x}')



# # cv2.imshow('Image', resized_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

## Select Multiple ROIs

In [60]:
for page_num, img_path in image_path_dict.items():
    
    # Read image
    img_raw = cv2.imread(img_path)
    print(f'Original image shape: {img_raw.shape}')

    # We need to keep in mind aspect ratio so the image does not look skewed or distorted. 
    # Therefore, we calculate the ratio of the new image to the old image
    r = 568.0 / img_raw.shape[1]
    dim = (568, int(img_raw.shape[0] * r))

    # Perform the actual resizing of the image and show it
    resized_img = cv2.resize(img_raw, dim, interpolation = cv2.INTER_AREA)
    print(f'Resized image shape: {resized_img.shape}')

    ROIs = cv2.selectROIs(windowName = "Select Rois", img = resized_img)

    # Print rectangle points of selected roi
    print(f'ROIs for all cropped images:\n{ROIs}')

    # "crop_number" variable is the counter to save image with different name
    # Loop over every bounding box save in array "ROIs"
    for crop_number, roi in enumerate(ROIs):
        
        # Extract the ROI coordinates and width & height
        x, y, w, h = roi

        # Crop roi from original image
        img_crop = resized_img[y:y+h, x:x+w]

        # Show cropped image
        cv2.imshow(f"crop{crop_number}", img_crop)

        # Save cropped image
        cv2.imwrite(f'Crop Images\{bank_chosen}\Page{page_num}\crop{crop_number + 1}.jpg', img_crop)

        # Save the ROI coordinates in text file
        if crop_number == 0 and os.path.isfile(f"Crop Images\{bank_chosen}\Page{page_num}\ROIs.txt"): # If the file exist
            # Clear the content in the text file
            open(f"Crop Images\{bank_chosen}\Page{page_num}\ROIs.txt", 'w').close()

        with open(f"Crop Images\{bank_chosen}\Page{page_num}\ROIs.txt", "a") as f:
            for index, x in enumerate(roi):        
                if index == len(roi) - 1:
                    f.write(f'{x}')
                    f.write('\n')
                else:
                    f.write(f'{x}, ')

        ROIs_lst.append(list(roi))

    # Destroy window
    cv2.waitKey(0) # Press any button to stop
    cv2.destroyAllWindows()
    break

Original image shape: (4094, 2893, 3)
Resized image shape: (803, 568, 3)
ROIs for all cropped images:
[[ 19  87 249  77]
 [302  87 250  76]
 [ 16 221 543 564]]


In [62]:
for page_num, img_path in image_path_dict.items():
    
    if page_num > 1:
        
        # Read image
        img_raw = cv2.imread(img_path)
        print(f'Original image shape: {img_raw.shape}')

        # We need to keep in mind aspect ratio so the image does not look skewed or distorted. 
        # Therefore, we calculate the ratio of the new image to the old image
        r = 568.0 / img_raw.shape[1]
        dim = (568, int(img_raw.shape[0] * r))

        # Perform the actual resizing of the image and show it
        resized_img = cv2.resize(img_raw, dim, interpolation = cv2.INTER_AREA)
        print(f'Resized image shape: {resized_img.shape}')

        print(f'ROIs for all cropped images:\n{rois}\n')

        for crop_number, roi in enumerate(rois):
            
            # Extract the ROI coordinates and width & height
            x, y, w, h = roi

            # # Crop roi from original image
            # img_crop = cv2.rectangle(img = resized_img, pt1 = (x, y), pt2 = (x+w, y+h), color = (0, 0, 255), thickness = 2)

            # Crop roi from original image
            img_crop = resized_img[y:y+h, x:x+w]

            # Show cropped image
            cv2.imshow(f"crop{crop_number}", img_crop)

            # Save cropped image
            cv2.imwrite(f'Crop Images\{bank_chosen}\Page{page_num}\crop{crop_number + 1}.jpg', img_crop)

            # Save the ROI coordinates in text file
            if crop_number == 0 and os.path.isfile(f"Crop Images\{bank_chosen}\Page{page_num}\ROIs.txt"): # If the file exist
                # Clear the content in the text file
                open(f"Crop Images\{bank_chosen}\Page{page_num}\ROIs.txt", 'w').close()

            with open(f"Crop Images\{bank_chosen}\Page{page_num}\ROIs.txt", "a") as f:
                for index, x in enumerate(roi):        
                    if index == len(roi) - 1:
                        f.write(f'{x}')
                        f.write('\n')
                    else:
                        f.write(f'{x}, ')

        # Destroy window
        cv2.waitKey(0) # Press any button to stop
        cv2.destroyAllWindows()  

Original image shape: (4094, 2893, 3)
Resized image shape: (803, 568, 3)
ROIs for all cropped images:
[[ 19  87 249  77]
 [302  87 250  76]
 [ 16 221 543 564]]

Original image shape: (4094, 2893, 3)
Resized image shape: (803, 568, 3)
ROIs for all cropped images:
[[ 19  87 249  77]
 [302  87 250  76]
 [ 16 221 543 564]]

Original image shape: (4094, 2893, 3)
Resized image shape: (803, 568, 3)
ROIs for all cropped images:
[[ 19  87 249  77]
 [302  87 250  76]
 [ 16 221 543 564]]

