In [52]:
import cv2
import numpy as np
import os
from pathlib import Path
import shutil
from scipy.ndimage import distance_transform_edt


In [53]:
def black_white_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    return thresh

def extract_letters(image):
    contours, _ = cv2.findContours(image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    letter_regions = []
    for contour in contours:
        (x, y, w, h) = cv2.boundingRect(contour)
        letter_regions.append((x, y, w, h))
    return letter_regions

def global_thresholding(image, letter_path, i):
    # Convert the image to grayscale if it is not
    if len(image.shape) > 2:
        image_read = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        image_read = image

    # Apply global thresholding
    _, binary_image = cv2.threshold(image_read, 127, 255, cv2.THRESH_BINARY)

    # Invert the binary image if necessary
    # Depending on the input image, you may need to invert the binary image
    # to ensure that the text appears as white on a black background
    binary_image = cv2.bitwise_not(binary_image)

    # Save the preprocessed image
    cv2.imwrite(f'{letter_path}/letter_{i}.jpg', binary_image)


def preprocessing_images(image_path, letter_path):
    # Load the image
    image = cv2.imread(image_path)

    # Turn image black and white
    black_white = black_white_image(image)

    # Extract individual letters
    letter_regions = extract_letters(black_white)

    # Iterate through each letter region
    i = 1
    for (x, y, w, h) in letter_regions:
        # Skip if the region is likely to be a dot
        if w < 15 and h < 15:
            continue

        # Calculate the number of divisions needed for the image width
        num_divisions = w // 50
        if w % 50 != 0:
            num_divisions += 1

        # Divide the image into the calculated number of parts
        for j in range(num_divisions):
            start_x = x + (w // num_divisions) * j
            end_x = start_x + (w // num_divisions)
            letter_image = image[y:y+h, start_x:end_x]

            if letter_image.size > 0:
                global_thresholding(letter_image, letter_path, i)
                i += 1



In [54]:
def process_images(letters_directory, images_directory):
    # Check if females directory exists
    if letters_directory.exists():
        shutil.rmtree(letters_directory)

    letters_directory.mkdir()
    
    # Get images from the directory
    files = os.listdir(images_directory)

    # Process each image
    for file in files:
        letter_directory = Path(f'{letters_directory}/{file.replace(".jpg", "")}')
        letter_directory.mkdir()
        preprocessing_images(images_directory + '/' + file, letter_directory)


def main():
    # Check if the letters directory exists
    letters_directory = 'letters'
    if not os.path.exists(letters_directory):
        os.makedirs(letters_directory)

    # Female images
    images_female_directory = './images/Females'
    letters_female_directory = Path('./letters/females')
    process_images(letters_female_directory, images_female_directory)

    # Male images
    images_male_directory = './images/Males'
    letters_male_directory = Path('./letters/males')
    process_images(letters_male_directory, images_male_directory)

main()
    