In [6]:
import numpy as np
import os
os.environ["KERAS_BACKEND"] = "torch"
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import keras

In [7]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [15]:
x_train.shape

(60000, 28, 28)

In [14]:
np.expand_dims(x_train, -1).shape

(60000, 28, 28, 1)

In [16]:
# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

x_train shape: (60000, 28, 28, 1)
y_train shape: (60000,)
60000 train samples
10000 test samples


In [17]:
# Model parameters
num_classes = 10
input_shape = (28, 28, 1)

In [18]:
model = keras.Sequential(
    [
        keras.layers.Input(shape=input_shape),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"),
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

In [19]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(),
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    metrics=[
        keras.metrics.SparseCategoricalAccuracy(name="acc"),
    ]
)

In [20]:
batch_size = 128
epochs = 20

callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_acc", patience=2, restore_best_weights=True),
]

In [21]:
model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.15,
    callbacks=callbacks,
)

Epoch 1/20
[1m  1/399[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:03[0m 762ms/step - acc: 0.0703 - loss: 2.3037

  torch._foreach_mul_(m_list, self.beta_1)


[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 334ms/step - acc: 0.5049 - loss: 1.3920 - val_acc: 0.9599 - val_loss: 0.1565
Epoch 2/20
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 345ms/step - acc: 0.9074 - loss: 0.3128 - val_acc: 0.9624 - val_loss: 0.1242
Epoch 3/20
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 357ms/step - acc: 0.9297 - loss: 0.2435 - val_acc: 0.9707 - val_loss: 0.0985
Epoch 4/20
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 334ms/step - acc: 0.9405 - loss: 0.2078 - val_acc: 0.9596 - val_loss: 0.1524
Epoch 5/20
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 378ms/step - acc: 0.9425 - loss: 0.1996 - val_acc: 0.9790 - val_loss: 0.0737
Epoch 6/20
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 302ms/step - acc: 0.9469 - loss: 0.1822 - val_acc: 0.9780 - val_loss: 0.0769
Epoch 7/20
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s

<keras.src.callbacks.history.History at 0x1303a5a90>

In [22]:
score = model.evaluate(x_test, y_test, verbose=0)
score

[0.06680799275636673, 0.9814000129699707]

In [23]:
predictions = model.predict(x_test)
predictions

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step


array([[1.1195441e-09, 2.0761222e-16, 1.9871419e-05, ..., 9.9997938e-01,
        8.3816218e-11, 4.4096325e-07],
       [1.7181263e-11, 7.5055162e-10, 9.9999976e-01, ..., 2.5288674e-10,
        4.1966902e-10, 5.7707200e-10],
       [1.1524761e-06, 9.9680966e-01, 1.2795942e-05, ..., 1.3432635e-03,
        4.7474805e-06, 6.9575501e-05],
       ...,
       [4.8970290e-13, 6.2968209e-25, 1.7231656e-16, ..., 3.6753377e-24,
        7.3144566e-09, 3.1446160e-10],
       [2.4006990e-09, 1.1222068e-24, 3.4466589e-13, ..., 3.2468221e-15,
        9.2827358e-06, 4.3999689e-11],
       [4.8261879e-17, 3.0403860e-25, 3.3699651e-11, ..., 3.1671969e-34,
        6.4929838e-13, 2.7476480e-21]], dtype=float32)

In [44]:
index = np.random.randint(0, predictions.shape[0] + 1)
print(f"Predicted: {np.argmax(predictions[index])}")
print(f"Actual: {y_test[index]}")

Predicted: 2
Actual: 2


In [46]:
model.save("model.keras")

2024-07-23 16:15:08.128007: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
