In [19]:
import numpy as np
from tqdm import tqdm
import cv2
import os
import imutils

def crop_img(img):
    """
    Detects the extreme points on the image and crops the rectangular region around them.
    """
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)
    
    # Apply thresholding and remove noise
    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)
    
    # Find contours and select the largest one
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)
    
    # Get the extreme points
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])
    ADD_PIXELS = 0
    cropped_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS, extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()
    
    return cropped_img

if __name__ == "__main__":
    training_path = "dataset/Training"
    testing_path = "dataset/Testing"
    training_dirs = os.listdir(training_path)
    testing_dirs = os.listdir(testing_path)
    IMG_SIZE = 256
    
    # Process training images
    for dir in training_dirs:
        save_path = 'cleaned/Training/' + dir
        path = os.path.join(training_path, dir)
        img_files = os.listdir(path)
        for img_file in img_files:
            img = cv2.imread(os.path.join(path, img_file))
            cropped_img = crop_img(img)
            resized_img = cv2.resize(cropped_img, (IMG_SIZE, IMG_SIZE))
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            cv2.imwrite(os.path.join(save_path, img_file), resized_img)
    
    # Process testing images
    for dir in testing_dirs:
        save_path = 'cleaned/Testing/' + dir
        path = os.path.join(testing_path, dir)
        img_files = os.listdir(path)
        for img_file in img_files:
            img = cv2.imread(os.path.join(path, img_file))
            cropped_img = crop_img(img)
            resized_img = cv2.resize(cropped_img, (IMG_SIZE, IMG_SIZE))
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            cv2.imwrite(os.path.join(save_path, img_file), resized_img)
