ref: https://stackoverflow.com/questions/61291795/detecting-borders-of-a-page-on-a-table-and-then-refocus

* pip install numpy
* pip install opencv-python
* pip install matplotlib

In [1]:
import os
import cv2
#import PIL
#print(PIL.PILLOW_VERSION)
import matplotlib.pyplot as plt

In [2]:
input_path = r'D:\Projects\repos\ai\cv-project-4-smart-cropping\data'
image_filename = '20221205_083430'
output_path = r'D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped'

# Histogram to find threshold values

In [None]:
#Read image and convert to gray.
image = cv2.imread(input_path + image_filename +".jpg")
gray_image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

In [None]:
#check histogram to choose threshold values. https://web.archive.org/web/20210224013921/https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_histograms/py_histogram_begins/py_histogram_begins.html
color = ('b','g','r')
fig = plt.figure(figsize=(12,12))
ax = fig.add_subplot(1,2,1)
ax.imshow(image)
ax1 = fig.add_subplot(1,2,2)
for i,col in enumerate(color):
    histogram = cv2.calcHist([image],[i],None,[256],[0,256])
    ax1.plot(histogram,color = col)
    ax1.set_xlim([0,256])

In [None]:
#Use blur to get rid of the notebook's details.
blurred_gray_image = cv2.blur(gray_image,(21,21))

In [None]:
#Do thresholding. Using values which we got from the histogram. https://en.wikipedia.org/wiki/Thresholding_(image_processing)
_,thresholded_blurry_image = cv2.threshold(blurred_gray_image,165,255,cv2.THRESH_BINARY)

In [None]:
#Detect contours (which are undivided, closed shapes). https://docs.opencv.org/3.4/d4/d73/tutorial_py_contours_begin.html
contours, hierarchy = cv2.findContours(thresholded_blurry_image,
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

In [None]:
#Draw the biggest contour's outline to the copy of the original image if there are any contours.Source post for finding the biggest contour.
output = image.copy()
if len(contours) != 0:
    c = max(contours, key = cv2.contourArea)
    # coordinates of the contour
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(output,(x,y),(x+w,y+h),(0,0,255),2)

In [None]:
#Show the result
output = cv2.cvtColor(output,cv2.COLOR_BGR2RGB)
plt.imshow(output)

In [None]:
#now crop image
output = output[y:y+h, x:x+w]
plt.imshow(output)

In [None]:
# save cropped image
status = cv2.imwrite(output_path + +"\\" + image_filename + '_cropped' + '.jpg', output)
print("Image written to file-system : ",status)

Use cv2.imwrite() function to save the image.

Be aware that this method won't always work because we evaluate the histogram ourselves and pick the thresholding values by hand. If you want to take a more general approach try adaptive thresholding or evaluate histogram values with help of an algorithm.

# Bulk Smart Page Cropping

In [23]:
def bulkCropImages(folder):
    for filename in os.listdir(folder):
        image = cv2.imread(os.path.join(folder,filename))
        if image is not None:
            #Read image and convert to gray.
            gray_image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
            #Use blur to get rid of the notebook's details.
            blurred_gray_image = cv2.blur(gray_image,(21,21))
            #forget histogram, we assume threshold values from testing earlier
            _,thresholded_blurry_image = cv2.threshold(blurred_gray_image,150,255,cv2.THRESH_BINARY) #165, 255 #120,255
            #Detect contours (which are undivided, closed shapes).
            contours, hierarchy = cv2.findContours(thresholded_blurry_image,cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            if len(contours) != 0:
                c = max(contours, key = cv2.contourArea)
                # coordinates of the contour
                x,y,w,h = cv2.boundingRect(c)
            #now crop image
            output = image[y:y+h, x:x+w]
            # save cropped image
            status = cv2.imwrite(output_path + "\\" + filename + '_cropped' + '.jpg', output)
            print(filename + " written to " + output_path + ": " + str(status))

In [24]:
bulkCropImages(input_path)

20221226_190425.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190439.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190445.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190451.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190500.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190503.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190508.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190513.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190519.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190524.jpg written to D:\Projects\repos\ai\cv-project-4-smart-cropping\cropped: True
20221226_190533.jpg written to D:\Projects\repos\ai\cv-proje