In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
from sklearn.metrics import classification_report
import numpy as np

Descrição da rede neural para o MNIST
<blockquote>We adopt a simple feed-forward architecture with three hidden layers. <b>The first convolutional layer has 32 channels with 5 × 5 kernels</b> followed by <b>3 × 3 max pooling and a stride of 2</b>. <b>The second layer is also convolutional, again with 32 channels with 3 × 3 kernels, also followed by 3 × 3 max pooling and a stride of 2</b>. This leads to a 1,568 dimensional representation <b>which feeds to a 100-dimensional tanh hidden layer with fully-connected weights, which then in a fully-connected manner feeds to the ten outputs</b>. We have softmax outputs and minimize <b>cross-entropy using Adam update rule with a learning rate 10−3</b>. The updates are performed after each <b>mini-batch of size 64</b>. The network for MNIST is trained for <b>25 epochs</b>. On MNIST, the model is able to achieve 99.25% accuracy on the test set.</blockquote>

In [3]:
# MNIST carregado
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

In [4]:
# Pré-processamento -- normalizando entre 0 e 1
x_train = x_train.reshape(-1, 28, 28, 1).astype("float32") / 255.0
x_test  = x_test.reshape(-1, 28, 28, 1).astype("float32") / 255.0
# normalizando entre -1 e 1
mean = np.mean(x_train)
std = np.std(x_train)
x_train = (x_train - mean) / std
x_test = (x_test - mean) / std

In [5]:
model = models.Sequential([
    layers.Input(shape=(28, 28, 1), name="input_layer"),
    # Camada convolucional 1
    layers.Conv2D(32, (5, 5), padding='same', activation='relu', name="conv1"),
    layers.MaxPooling2D(pool_size=(3, 3), strides=2, name="pool1"),

    # Camada convolucional 2
    layers.Conv2D(32, (3, 3), padding='same', activation='relu', name="conv2"),
    layers.MaxPooling2D(pool_size=(3, 3), strides=2, name="pool2"),

    # Flatten
    layers.Flatten(name="flatten"),

    # Camada fully connected com tanh
    layers.Dense(100, activation='tanh', name="dense_tanh"),

    # Camada de saída com softmax
    layers.Dense(10, activation='softmax', name="output_layer")
], name="mnist_model")

2025-06-08 17:18:16.969045: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [8]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True
)

model.summary()

In [10]:
history = model.fit(
    x_train, y_train,
    validation_data=(x_test, y_test),
    epochs=25,
    batch_size=64,
    callbacks=[early_stopping]
)

Epoch 1/25
[1m 16/938[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0042

2025-06-08 17:21:29.641012: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9967 - loss: 0.0105 - val_accuracy: 0.9887 - val_loss: 0.0368
Epoch 2/25
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9968 - loss: 0.0098 - val_accuracy: 0.9868 - val_loss: 0.0422
Epoch 3/25
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9969 - loss: 0.0096 - val_accuracy: 0.9914 - val_loss: 0.0265
Epoch 4/25
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9979 - loss: 0.0068 - val_accuracy: 0.9925 - val_loss: 0.0269
Epoch 5/25
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11ms/step - accuracy: 0.9975 - loss: 0.0087 - val_accuracy: 0.9924 - val_loss: 0.0295
Epoch 6/25
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9984 - loss: 0.0060 - val_accuracy: 0.9895 - val_loss: 0.0374
Epoch 7/25
[1m938/938[0m 

In [11]:
# Salvar o modelo
model.save("modelos/mnist_model.keras")

In [None]:
# relatorio de treinamento
y_pred = np.argmax(model.predict(x_test), axis=1)
print(classification_report(y_test, y_pred))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      1.00      1.00      1135
           2       1.00      0.99      0.99      1032
           3       0.99      1.00      1.00      1010
           4       0.99      0.99      0.99       982
           5       0.99      0.99      0.99       892
           6       0.99      0.99      0.99       958
           7       0.99      0.99      0.99      1028
           8       0.99      0.99      0.99       974
           9       0.99      0.99      0.99      1009

    accuracy                           0.99     10000
   macro avg       0.99      0.99      0.99     10000
weighted avg       0.99      0.99      0.99     10000



In [None]:
# acurácia alcançada
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Acurácia no conjunto de teste: {test_accuracy:.5f}") # mesma do artigo, 99,25%

[1m300/313[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9916 - loss: 0.0309