In [None]:
import os
import pandas as pd
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [30]:
# Set dataset path (absolute)
data_path = "D:/Codes/Handwritten Digit Recognizer"
csv_path = os.path.join(data_path, "dataset.csv")


In [31]:
# Load CSV
df = pd.read_csv(csv_path)

# Filter digits 0–9 (if needed)
df = df[df['label'].between(0, 9)]

# Load images and labels
images = []
labels = []

for _, row in df.iterrows():
    img_path = os.path.join(data_path, row['image'])  # FIXED
    if not os.path.exists(img_path):
        print(f"[Missing Image] {img_path}")
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is not None:
        img = cv2.resize(img, (28, 28))
        img = img / 255.0
        images.append(img)
        labels.append(row['label'])



# Convert to numpy arrays
images = np.array(images).reshape(-1, 28, 28, 1)
labels = np.array(labels)

# Split data
x_train, x_test_custom, y_train, y_test_custom = train_test_split(images, labels, test_size=0.2, random_state=42)


In [41]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [42]:
model.compile(optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [43]:
model.fit(x_train, y_train, epochs=10, validation_data=(x_test_custom, y_test_custom))

# Save model
model.save(os.path.join(data_path, 'handwritten_model.keras'))

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.1158 - loss: 2.3295 - val_accuracy: 0.1700 - val_loss: 2.1852
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1673 - loss: 2.2030 - val_accuracy: 0.3200 - val_loss: 2.0046
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3317 - loss: 1.9335 - val_accuracy: 0.5300 - val_loss: 1.6157
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4724 - loss: 1.6119 - val_accuracy: 0.6650 - val_loss: 1.1370
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6178 - loss: 1.1543 - val_accuracy: 0.7350 - val_loss: 0.8584
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6944 - loss: 0.8905 - val_accuracy: 0.8500 - val_loss: 0.6180
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━

In [None]:
# Evaluate on custom test set
custom_loss, custom_acc = model.evaluate(x_test_custom, y_test_custom, verbose=0)
print(f"[Custom Test Set Accuracy] {custom_acc:.4f}")

[Custom Test Set Accuracy] 0.9300
[MNIST Test Set Accuracy]  0.0149
