In [1]:
# 1. Import libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [2]:
# 2. Setup Paths
train_dir = "C:/Users/HP/Desktop/cHEAL Internship/Datasets/Dyslexia Handwriting Dataset/Train"
test_dir = "C:/Users/HP/Desktop/cHEAL Internship/Datasets/Dyslexia Handwriting Dataset/Train"

In [3]:
# 3. Image Data Generators
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [4]:
train_datagen = ImageDataGenerator(rescale=1./255)

In [5]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

Found 151649 images belonging to 3 classes.


In [7]:
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

Found 151649 images belonging to 3 classes.


In [8]:
# 4. Label extraction
train_labels = train_generator.classes
test_labels = test_generator.classes

In [9]:
print("Class indices:", train_generator.class_indices)

Class indices: {'Corrected': 0, 'Normal': 1, 'Reversal': 2}


In [10]:
feature_extractor = tf.keras.applications.MobileNetV3Large(
    input_shape=(224,224,3),
    include_top=False,
    pooling='avg',
    weights='imagenet'
)
feature_extractor.trainable = False

In [11]:
# Extract Features
def extract_features(generator, feature_extractor):
    features = []
    total_batches = len(generator)

    for i in range(total_batches):
        batch = next(generator)
        batch_features = feature_extractor.predict(batch)
        features.append(batch_features)

    features = np.vstack(features)
    return features

In [12]:
train_features = extract_features(train_generator, feature_extractor)
test_features = extract_features(test_generator, feature_extractor)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 674ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 673ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 550ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 544ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 563ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 551ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 641ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 634ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 573ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 559ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 562ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 538ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [13]:

print("Train features shape:", train_features.shape)
print("Test features shape:", test_features.shape)

Train features shape: (151649, 960)
Test features shape: (151649, 960)


In [None]:
# 7. Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(train_features, train_labels)

In [None]:
# 8. Evaluate
predictions = rf.predict(test_features)
print(predictions.score())
print(classification_report(test_labels, predictions))

In [None]:
# 9. Save Models
joblib.dump(rf, "random_forest_handwriting.pkl")
feature_extractor.save("mobilenetv3_feature_extractor.keras")