<a href="https://colab.research.google.com/github/sandushiw98/Early-Detection-of-Dysgraphia-in-Sinhala-Speaking-Children-Using-Multi-Modal-Machine-Learning/blob/main/Handwritings_Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import cv2
import os
import numpy as np

# Define the paths
input_folder = '/content/drive/MyDrive/Research_Dysgraphia/Dysgraphia_Handwritings -Before_Preprocessing'
output_folder = '/content/drive/MyDrive/Research_Dysgraphia/Preprocessed_Dysgraphic_Words'

# Desired size for all images
desired_size = (256, 256)

# Create the output folder if it does not exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

processed_count = 0  # Counter to track how many images are processed

# Process each image in the input folder
for index, filename in enumerate(os.listdir(input_folder), start=1):  # Start counting from 1
    file_path = os.path.join(input_folder, filename)
    file_extension = os.path.splitext(filename)[1].lower()

    if file_extension in [".jpg", ".jpeg", ".png"]:  # Process JPG, JPEG, and PNG images
        try:
            # Read and process the image in grayscale
            image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
            if image is None:
                raise ValueError(f"Unable to read the image {filename}")

            # Apply Gaussian blur to reduce noise
            blurred_image = cv2.GaussianBlur(image, (5, 5), 0)

            # Apply adaptive thresholding with refined parameters
            thresh_image = cv2.adaptiveThreshold(blurred_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                                 cv2.THRESH_BINARY_INV, 9, 4)

            # Morphological erosion to clean up the image
            kernel_erode = np.ones((2,2), np.uint8)
            morph_image = cv2.morphologyEx(thresh_image, cv2.MORPH_ERODE, kernel_erode)

            # Apply a slight dilation to enhance the characters
            kernel_dilate = np.ones((3,3), np.uint8)  # Slightly larger kernel for dilation
            morph_image = cv2.morphologyEx(morph_image, cv2.MORPH_DILATE, kernel_dilate)

            # Resize the processed image
            resized_image = cv2.resize(morph_image, desired_size)

            # Generate a new filename with custom labeling and save as PNG
            new_filename = f"DH{index}.png"
            output_path = os.path.join(output_folder, new_filename)
            cv2.imwrite(output_path, resized_image)
            processed_count += 1

        except Exception as e:
            print(f"Error processing {filename}: {e}")

print(f"Processing complete. Processed {processed_count} images.")


Processing complete. Processed 73 images.


In [None]:
import cv2
import os
import numpy as np

# Define the paths
input_folder = '/content/drive/MyDrive/Research_Dysgraphia/Normal_Handwritings-Before_Preprocessing'
output_folder = '/content/drive/MyDrive/Research_Dysgraphia/preprocessed_Non_Dysgraphic_Letters'

# Desired size for all images
desired_size = (256, 256)

# Create the output folder if it does not exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

processed_count = 0  # Counter to track how many images are processed

# Process each image in the input folder
for index, filename in enumerate(os.listdir(input_folder), start=1):  # Start counting from 1
    if filename.lower().endswith(".jpeg"):  # Only process JPEG images
        file_path = os.path.join(input_folder, filename)

        try:
            # Read and process the image
            image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
            if image is None:
                raise ValueError(f"Unable to read the image {filename}")

            # Apply median blur to reduce salt-and-pepper noise
            median_filtered = cv2.medianBlur(image, 5)

            # Apply adaptive thresholding
            thresh_image = cv2.adaptiveThreshold(median_filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                                 cv2.THRESH_BINARY_INV, 11, 2)

            # Morphological operations to clean up image
            kernel = np.ones((3,3), np.uint8)
            morph_image = cv2.morphologyEx(thresh_image, cv2.MORPH_OPEN, kernel)

            # Identify and remove small objects using connected components
            num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(morph_image, connectivity=8, ltype=cv2.CV_32S)
            areas = stats[1:, cv2.CC_STAT_AREA]  # Get the area of each component
            result_image = morph_image.copy()

            # Threshold for determining the size of noise to remove
            for i in range(1, num_labels):  # Start from 1 to ignore the background
                if areas[i - 1] < 30:  # Threshold for small objects; adjust as needed
                    result_image[labels == i] = 0

            # Resize the processed image
            resized_image = cv2.resize(result_image, desired_size)

            # Save the processed image in PNG format with custom labeling
            output_filename = f"NDH{index}.png"  # Custom labeling as per your requirement
            output_path = os.path.join(output_folder, output_filename)
            cv2.imwrite(output_path, resized_image)
            processed_count += 1

        except Exception as e:
            print(f"Error processing {filename}: {e}")

print(f"Processing complete. Processed {processed_count} images.")


Processing complete. Processed 306 images.
