In [157]:
import cv2
import numpy as np
import os
from pathlib import Path
import shutil


In [158]:
def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    return thresh

def extract_letters(image):
    contours, _ = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    letter_regions = []
    for contour in contours:
        (x, y, w, h) = cv2.boundingRect(contour)
        letter_regions.append((x, y, w, h))
    return letter_regions

def global_thresholding(image, letter_path, i):
    # Convert the image to grayscale if it is not
    if len(image.shape) > 2:
        image_read = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        image_read = image

    # Apply global thresholding
    _, binary_image = cv2.threshold(image_read, 127, 255, cv2.THRESH_BINARY)

    # Invert the binary image if necessary
    # Depending on the input image, you may need to invert the binary image
    # to ensure that the text appears as white on a black background
    binary_image = cv2.bitwise_not(binary_image)

    # Save the preprocessed image
    cv2.imwrite(f'{letter_path}/letter_{i}.jpg', binary_image)


def processing_images(image_path, letter_path):
    # Load the image
    image = cv2.imread(image_path)

    # Preprocess the image
    preprocessed_image = preprocess_image(image)

    # Extract individual letters
    letter_regions = extract_letters(preprocessed_image)

    # Iterate through each letter region
    i = 1
    for (x, y, w, h) in letter_regions:
        # Skip if the region is likely to be a dot
        if w < 10 and h < 15:
            continue
        
        letter_image = image[y:y+h, x:x+w]  # Crop the letter region
        global_thresholding(letter_image, letter_path, i)
        i += 1


In [159]:
# Specify the directory
directory = './images/Females'
letters_directory = Path('./letters/females')

# Remove the letters directory if it exists
if letters_directory.exists():
    shutil.rmtree(letters_directory)

# Create the letters directory
letters_directory.mkdir()

# Get a list of all files in the directory
files = os.listdir(directory)

for file in files:
    letters_directory = Path(f'./letters/females/{file.replace(".jpg", "")}')
    letters_directory.mkdir()
    processing_images(directory + '/' + file, letters_directory)