In [1]:
import tensorflow as tf

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize to [0, 1]
x_train = x_train.reshape(-1, 28 * 28)  # Flatten images
x_test = x_test.reshape(-1, 28 * 28)

# Convert labels to one-hot encoding
y_train_onehot = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes=10)


In [4]:
index = 0 
image = x_train[index]
print(image.shape)  # Print the shape of the image
y_test_onehot[0]

(784,)


array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])

In [None]:
class SimpleNN(tf.keras.Model):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(10, activation='softmax')


    def call(self, inputs):
        x = self.dense1(inputs)
        return self.dense2(x)

In [7]:
model = SimpleNN()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.CategoricalCrossentropy()

2025-08-10 19:48:36.329618: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-08-10 19:48:36.329984: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-08-10 19:48:36.329993: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1754830116.330422 28945882 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1754830116.330510 28945882 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [9]:
epochs = 3
batch_size = 32
num_batches = len(x_train) // batch_size

print("num_batches : ",num_batches)

num_batches :  1875


In [11]:
1000// 32


31

In [None]:
0 to 32
32 to 64

In [10]:
model.trainable_variables

[]

In [12]:

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")

    #new models creations
    # set weights from best model
    
    for i in range(num_batches):
        # Get a batch of data
        start = i * batch_size

        
        end = start + batch_size
        x_batch = x_train[start:end]
        y_batch = y_train_onehot[start:end]
        
        with tf.GradientTape() as tape:
            predictions = model(x_batch, training=True)  # Forward pass
            loss = loss_fn(y_batch, predictions)        # Compute loss
        

        gradients = tape.gradient(loss, model.trainable_variables) 
      
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))  # Update weights

        if i % 200 == 0:  # Print progress every 200 batches
            print(f"Batch {i}/{num_batches}, Loss: {loss.numpy():.4f}")

        

Epoch 1/3
Batch 0/1875, Loss: 2.3379
Batch 200/1875, Loss: 0.4071
Batch 400/1875, Loss: 0.1741
Batch 600/1875, Loss: 0.1903
Batch 800/1875, Loss: 0.1447
Batch 1000/1875, Loss: 0.4514
Batch 1200/1875, Loss: 0.2078
Batch 1400/1875, Loss: 0.2746
Batch 1600/1875, Loss: 0.2024
Batch 1800/1875, Loss: 0.1672
Epoch 2/3
Batch 0/1875, Loss: 0.0713
Batch 200/1875, Loss: 0.1812
Batch 400/1875, Loss: 0.1053
Batch 600/1875, Loss: 0.0497
Batch 800/1875, Loss: 0.0869
Batch 1000/1875, Loss: 0.2246
Batch 1200/1875, Loss: 0.1548
Batch 1400/1875, Loss: 0.1880
Batch 1600/1875, Loss: 0.1510
Batch 1800/1875, Loss: 0.0926
Epoch 3/3
Batch 0/1875, Loss: 0.0374
Batch 200/1875, Loss: 0.1032
Batch 400/1875, Loss: 0.0914
Batch 600/1875, Loss: 0.0314
Batch 800/1875, Loss: 0.0613
Batch 1000/1875, Loss: 0.0979
Batch 1200/1875, Loss: 0.1367
Batch 1400/1875, Loss: 0.1440
Batch 1600/1875, Loss: 0.0781
Batch 1800/1875, Loss: 0.0548


In [7]:
# Evaluate the model
test_loss = loss_fn(y_test_onehot, model(x_test))
test_accuracy = tf.keras.metrics.categorical_accuracy(y_test_onehot, model(x_test))
print(f"Test Loss: {test_loss.numpy():.4f}, Test Accuracy: {tf.reduce_mean(test_accuracy).numpy():.4f}")

Test Loss: 0.1047, Test Accuracy: 0.9673
