In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# -------------------
# Load MNIST dataset
# -------------------
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize and add channel dim
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0
x_train = x_train[..., tf.newaxis]   # (N,28,28,1)
x_test  = x_test[..., tf.newaxis]

# One-hot labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test  = tf.keras.utils.to_categorical(y_test, 10)

# -------------------
# Build simple CNN + RNN model
# -------------------
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation="relu", input_shape=(28,28,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation="relu"),
    layers.MaxPooling2D((2,2)),

    # reshape for RNN: (batch, time_steps, features)
    layers.Reshape((5, 64*5)),  # 7 time steps, 320 features

    layers.SimpleRNN(64),
    layers.Dense(64, activation="relu"),
    layers.Dense(10, activation="softmax")
])

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# -------------------
# Checkpoint callback
# -------------------
checkpoint = callbacks.ModelCheckpoint(
    "pen_to_pixel.keras", save_best_only=True, monitor="val_accuracy", mode="max"
)

# -------------------
# Train
# -------------------
history = model.fit(
    x_train, y_train,
    epochs=5, batch_size=64,
    validation_split=0.1,
    callbacks=[checkpoint],
    verbose=2
)

# -------------------
# Evaluate best model
# -------------------
best_model = tf.keras.models.load_model("pen_to_pixel.keras")
loss, acc = best_model.evaluate(x_test, y_test, verbose=0)
print(f"✅ Test accuracy: {acc:.4f}")


Epoch 1/5


I0000 00:00:1756991806.531378   17895 service.cc:152] XLA service 0x71cb7c004c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1756991806.531394   17895 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4050 Laptop GPU, Compute Capability 8.9
2025-09-04 18:46:46.582433: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1756991806.755842   17895 cuda_dnn.cc:529] Loaded cuDNN version 90300

I0000 00:00:1756991809.340881   17895 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


844/844 - 9s - 10ms/step - accuracy: 0.9371 - loss: 0.2235 - val_accuracy: 0.9807 - val_loss: 0.0645
Epoch 2/5
844/844 - 2s - 2ms/step - accuracy: 0.9827 - loss: 0.0564 - val_accuracy: 0.9865 - val_loss: 0.0461
Epoch 3/5
844/844 - 1s - 2ms/step - accuracy: 0.9871 - loss: 0.0409 - val_accuracy: 0.9852 - val_loss: 0.0440
Epoch 4/5
844/844 - 1s - 2ms/step - accuracy: 0.9909 - loss: 0.0297 - val_accuracy: 0.9873 - val_loss: 0.0466
Epoch 5/5
844/844 - 1s - 2ms/step - accuracy: 0.9920 - loss: 0.0250 - val_accuracy: 0.9852 - val_loss: 0.0536
✅ Test accuracy: 0.9874


In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# -------------------
# Load MNIST dataset
# -------------------
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize and add channel dim
x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0
x_train = x_train[..., tf.newaxis]   # (N,28,28,1)
x_test  = x_test[..., tf.newaxis]

# One-hot labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test  = tf.keras.utils.to_categorical(y_test, 10)

# -------------------
# Build simple CNN + RNN model
# -------------------
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation="relu", input_shape=(28,28,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation="relu"),
    layers.MaxPooling2D((2,2)),

    # reshape for RNN: (batch, time_steps, features)
    layers.Reshape((5, 64*5)),  # 7 time steps, 320 features

    layers.SimpleRNN(64),
    layers.Dense(64, activation="relu"),
    layers.Dense(10, activation="softmax")
])

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# -------------------
# Checkpoint callback
# -------------------
checkpoint = callbacks.ModelCheckpoint(
    "pen_to_pixel.keras", save_best_only=True, monitor="val_accuracy", mode="max"
)

# -------------------
# Train
# -------------------
history = model.fit(
    x_train, y_train,
    epochs=35, batch_size=128,
    validation_split=0.1,
    callbacks=[checkpoint],
    verbose=2
)

# -------------------
# Evaluate best model
# -------------------
best_model = tf.keras.models.load_model("pen_to_pixel.keras")
loss, acc = best_model.evaluate(x_test, y_test, verbose=0)
print(f"✅ Test accuracy: {acc:.4f}")


Epoch 1/35







422/422 - 9s - 22ms/step - accuracy: 0.9137 - loss: 0.2969 - val_accuracy: 0.9812 - val_loss: 0.0637
Epoch 2/35
422/422 - 1s - 3ms/step - accuracy: 0.9799 - loss: 0.0655 - val_accuracy: 0.9823 - val_loss: 0.0591
Epoch 3/35
422/422 - 1s - 3ms/step - accuracy: 0.9855 - loss: 0.0470 - val_accuracy: 0.9855 - val_loss: 0.0484
Epoch 4/35
422/422 - 1s - 3ms/step - accuracy: 0.9886 - loss: 0.0373 - val_accuracy: 0.9828 - val_loss: 0.0568
Epoch 5/35
422/422 - 1s - 3ms/step - accuracy: 0.9911 - loss: 0.0287 - val_accuracy: 0.9857 - val_loss: 0.0501
Epoch 6/35
422/422 - 1s - 3ms/step - accuracy: 0.9928 - loss: 0.0229 - val_accuracy: 0.9862 - val_loss: 0.0473
Epoch 7/35
422/422 - 1s - 3ms/step - accuracy: 0.9937 - loss: 0.0195 - val_accuracy: 0.9895 - val_loss: 0.0393
Epoch 8/35
422/422 - 1s - 3ms/step - accuracy: 0.9942 - loss: 0.0170 - val_accuracy: 0.9885 - val_loss: 0.0467
Epoch 9/35
422/422 - 1s - 3ms/step - accuracy: 0.9954 - loss: 0.0133 - val_accuracy: 0.9915 - val_loss: 0.0366
Epoch 10/35