In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
# Paths
dataset_path = "Handwriting"  # Replace with your dataset folder path
categories = ["Low Risk for Dysgraphia", "High Risk for Dysgraphia"]

In [None]:
# Image preprocessing parameters
img_width, img_height = 150, 150  # Resize all images
data = []
labels = []

In [None]:
# Function to segment words in an image
def segment_words(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Apply binary thresholding
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    word_segments = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        # Filter small noise
        if w > 20 and h > 20:  # Adjust thresholds as needed
            word = image[y:y+h, x:x+w]
            word_segments.append(word)
    return word_segments

In [None]:
# Read and label the images
for category in categories:
    folder_path = os.path.join(dataset_path, category)
    label = categories.index(category)  # 0 for "Low Potential Dysgraphia", 1 for "Potential Dysgraphia"

    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        try:
            # Read image
            img = cv2.imread(img_path)
            # Segment the words in the image
            word_segments = segment_words(img)

            for word in word_segments:
                # Resize each segmented word to fit model input shape
                word_resized = cv2.resize(word, (img_width, img_height))
                
                # Convert to grayscale and normalize
                word_gray = cv2.cvtColor(word_resized, cv2.COLOR_BGR2GRAY)

                # Automatically invert if text is white on black
                mean_intensity = np.mean(word_gray)
                if mean_intensity < 127:
                    # Likely white text on black background → invert
                    word_gray = cv2.bitwise_not(word_gray)

                word_normalized = word_gray / 255.0
                
                # Reshape for CNN (150x150x1)
                word_normalized = np.expand_dims(word_normalized, axis=-1)

                # Append the data and labels
                data.append(word_normalized)
                labels.append(label)
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")