In [1]:
import tensorflow as tf

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize to [0, 1]
x_train = x_train.reshape(-1, 28 * 28)  # Flatten images
x_test = x_test.reshape(-1, 28 * 28)

# Convert labels to one-hot encoding
y_train_onehot = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes=10)


In [2]:
class SimpleNN(tf.keras.Model):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, inputs):
        x = self.dense1(inputs)
        return self.dense2(x)

In [3]:
model = SimpleNN()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.CategoricalCrossentropy()

In [4]:
epochs = 3
batch_size = 32
num_batches = len(x_train) // batch_size

print("num_batches : ",num_batches)

num_batches :  1875


In [5]:
model.trainable_variables

[]

In [None]:

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")

    #new models creations
    # set weights from best model
    
    for i in range(num_batches):
        # Get a batch of data
        start = i * batch_size
        end = start + batch_size
        x_batch = x_train[start:end]
        y_batch = y_train_onehot[start:end]
        
        with tf.GradientTape() as tape:
            predictions = model(x_batch, training=True)  # Forward pass
            loss = loss_fn(y_batch, predictions)        # Compute loss
        

        gradients = tape.gradient(loss, model.trainable_variables) 
      
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))  # Update weights

        if i % 200 == 0:  # Print progress every 200 batches
            print(f"Batch {i}/{num_batches}, Loss: {loss.numpy():.4f}")

        

Epoch 1/3
Batch 0/1875, Loss: 2.3726
Batch 200/1875, Loss: 0.4098
Batch 400/1875, Loss: 0.1856
Batch 600/1875, Loss: 0.2085
Batch 800/1875, Loss: 0.1423
Batch 1000/1875, Loss: 0.4300
Batch 1200/1875, Loss: 0.1640
Batch 1400/1875, Loss: 0.2432
Batch 1600/1875, Loss: 0.2447
Batch 1800/1875, Loss: 0.1553
Epoch 2/3
Batch 0/1875, Loss: 0.0684
Batch 200/1875, Loss: 0.0947
Batch 400/1875, Loss: 0.1066
Batch 600/1875, Loss: 0.0612
Batch 800/1875, Loss: 0.0572
Batch 1000/1875, Loss: 0.2744
Batch 1200/1875, Loss: 0.0911
Batch 1400/1875, Loss: 0.1518
Batch 1600/1875, Loss: 0.1669
Batch 1800/1875, Loss: 0.1100
Epoch 3/3
Batch 0/1875, Loss: 0.0491
Batch 200/1875, Loss: 0.0724
Batch 400/1875, Loss: 0.1183
Batch 600/1875, Loss: 0.0506
Batch 800/1875, Loss: 0.0357
Batch 1000/1875, Loss: 0.1584
Batch 1200/1875, Loss: 0.0716
Batch 1400/1875, Loss: 0.0802
Batch 1600/1875, Loss: 0.1040
Batch 1800/1875, Loss: 0.0754


In [7]:
# Evaluate the model
test_loss = loss_fn(y_test_onehot, model(x_test))
test_accuracy = tf.keras.metrics.categorical_accuracy(y_test_onehot, model(x_test))
print(f"Test Loss: {test_loss.numpy():.4f}, Test Accuracy: {tf.reduce_mean(test_accuracy).numpy():.4f}")

Test Loss: 0.1047, Test Accuracy: 0.9673
