In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
# Paths
dataset_path = "Handwriting"  # Replace with your dataset folder path
categories = ["Low Risk for Dysgraphia", "High Risk for Dysgraphia"]

In [None]:
# Image preprocessing parameters
img_width, img_height = 150, 150  # Resize all images
data = []
labels = []

In [None]:
# Function to segment words in an image
def segment_words(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Apply binary thresholding
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    word_segments = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        # Filter small noise
        if w > 20 and h > 20:  # Adjust thresholds as needed
            word = image[y:y+h, x:x+w]
            word_segments.append(word)
    return word_segments

In [None]:
# Read and label the images
for category in categories:
    folder_path = os.path.join(dataset_path, category)
    label = categories.index(category)  # 0 for "Low Potential Dysgraphia", 1 for "Potential Dysgraphia"

    for img_name in os.listdir(folder_path):
        img_path = os.path.join(folder_path, img_name)
        try:
            # Read image
            img = cv2.imread(img_path)
            # Segment the words in the image
            word_segments = segment_words(img)

            for word in word_segments:
                # Resize each segmented word to fit model input shape
                word_resized = cv2.resize(word, (img_width, img_height))
                
                # Convert to grayscale and normalize
                word_gray = cv2.cvtColor(word_resized, cv2.COLOR_BGR2GRAY)

                # Automatically invert if text is white on black
                mean_intensity = np.mean(word_gray)
                if mean_intensity < 127:
                    # Likely white text on black background → invert
                    word_gray = cv2.bitwise_not(word_gray)

                word_normalized = word_gray / 255.0
                
                # Reshape for CNN (150x150x1)
                word_normalized = np.expand_dims(word_normalized, axis=-1)

                # Append the data and labels
                data.append(word_normalized)
                labels.append(label)
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")

In [None]:
# Convert to numpy arrays
data = np.array(data, dtype="float32")  # No need to divide again here, it's already done
labels = np.array(labels)

In [None]:
# Train-test split
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

In [None]:
# Build the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 1)),  # Shape: (150, 150, 1)
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

In [None]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Train the model
epochs = 10
history = model.fit(
    X_train, y_train,
    epochs=epochs,
    validation_data=(X_val, y_val),
    batch_size=32
)

In [None]:
# Save the model
model.save("handwriting_dysgraphia_model.h5")

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import random
import os

# Data distribution visualization
category_counts = [len(os.listdir(os.path.join(dataset_path, category))) for category in categories]
plt.figure(figsize=(8, 6))
sns.barplot(x=categories, y=category_counts, palette="viridis")
plt.title("Distribution of Samples in Dataset")
plt.xlabel("Category")
plt.ylabel("Number of Samples")
plt.show()

In [None]:
# Display random sample images
def show_random_samples(data, labels, category_names, num_samples=5):
    plt.figure(figsize=(12, 8))
    indices = random.sample(range(len(data)), num_samples)
    for i, idx in enumerate(indices):
        plt.subplot(1, num_samples, i + 1)
        plt.imshow(data[idx])
        plt.title(category_names[labels[idx]])
        plt.axis("off")
    plt.tight_layout()
    plt.show()

# Display random images from each class
show_random_samples(data, labels, categories, num_samples=5)

In [None]:
# Training and validation accuracy/loss plots
def plot_training_history(history):
    plt.figure(figsize=(12, 5))

    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title("Accuracy over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()

    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title("Loss over Epochs")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()

    plt.tight_layout()
    plt.show()

# Plot training history
plot_training_history(history)