In [9]:
import os
import numpy as np
import cv2
from tensorflow.keras.utils import to_categorical

def load_data(data_dir):
    images = []
    labels = []
    class_names = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))])
    num_classes = len(class_names)

    for label, class_name in enumerate(class_names):
        class_dir = os.path.join(data_dir, class_name)
        for file_name in os.listdir(class_dir):
            file_path = os.path.join(class_dir, file_name)
            if os.path.isfile(file_path):
                image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                if image is not None:  # Ensure the image is loaded correctly
                    image = cv2.resize(image, (28, 28))  # Resize to a fixed size (28x28)
                    image = image / 255.0  # Normalize pixel values to [0, 1]
                    images.append(image)
                    labels.append(label)

    images = np.array(images)
    images = np.expand_dims(images, axis=-1)  # Add channel dimension
    labels = np.array(labels)
    labels = to_categorical(labels, num_classes)

    return images, labels, class_names

data_dir = '../data/handwritting_characters_database/curated'
images, labels, class_names = load_data(data_dir)


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def build_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(128, kernel_size=(4, 4), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

input_shape = (28, 28, 1)
num_classes = len(class_names)
model = build_model(input_shape, num_classes)


In [11]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)


Epoch 1/10
[1m1560/1560[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.3062 - loss: 2.8149 - val_accuracy: 0.6959 - val_loss: 0.9404
Epoch 2/10
[1m1560/1560[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.6503 - loss: 1.1209 - val_accuracy: 0.7456 - val_loss: 0.7549
Epoch 3/10
[1m1560/1560[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.7075 - loss: 0.8983 - val_accuracy: 0.7712 - val_loss: 0.6713
Epoch 4/10
[1m1560/1560[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.7369 - loss: 0.7950 - val_accuracy: 0.7798 - val_loss: 0.6348
Epoch 5/10
[1m1560/1560[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 8ms/step - accuracy: 0.7583 - loss: 0.7186 - val_accuracy: 0.7824 - val_loss: 0.6095
Epoch 6/10
[1m1560/1560[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.7727 - loss: 0.6553 - val_accuracy: 0.7967 - val_loss: 0.5744
Epoch 7/10

In [12]:
model.save('models/curated_cnn_model.h5')



In [13]:
loss, accuracy = model.evaluate(X_val, y_val)
print(f'Validation accuracy: {accuracy * 100:.2f}%')

[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8114 - loss: 0.5388
Validation accuracy: 80.84%
