# Our first example of TensorFlow code (`page 5 / 35 of the notes`)

In [4]:
import tensorflow as tf
from tensorflow import keras

NB_CLASSES = 10
RESHAPED = 784
model = tf.keras.models.Sequential()
model.add(keras.layers.Dense(NB_CLASSES, input_shape = (RESHAPED,), kernel_initializer='zeros', name = 'dense_layer', activation = 'softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Defining a simple neural net in TensorFlow (`page 11 / 42 of the notes`)

In this seciton we use TensorFlow to  define a network that recognizes *MNIST handwritten digits*.


In [5]:
import numpy as np

# Network and training parameters 
EPOCHS = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 # how much TRAIN is reserved for VALIDATION

# Loading MNIST dataset.
# verify
# You can verify that the split between train and test is 60,000 and 10,000 respectively.
# Labels have one-hot representation automatically applied
mnist = keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

# X_train is 60,000 rows of 28x28 values; we --> reshape it to 60,000 x 784.
RESHAPED = 784
#
X_train =  X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalize inputs to be within in [0,1].
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# One-hot representation of the labels.
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
60000 train samples
10000 test samples


In [6]:
# Build the model
model = tf.keras.models.Sequential()
model.add(keras.layers.Dense(NB_CLASSES, 
                             input_shape = (RESHAPED,), 
                             name = 'dense_layer', 
                             activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# Compiling the model
model.compile(optimizer='SGD',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

In [8]:
# Training the model
model.fit(X_train,Y_train, 
          batch_size=BATCH_SIZE, epochs = EPOCHS,
          verbose = VERBOSE, validation_split = VALIDATION_SPLIT)

Epoch 1/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6879 - loss: 1.3514 - val_accuracy: 0.8345 - val_loss: 0.8816
Epoch 2/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8344 - loss: 0.7806 - val_accuracy: 0.8656 - val_loss: 0.6502
Epoch 3/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8537 - loss: 0.6353 - val_accuracy: 0.8758 - val_loss: 0.5568
Epoch 4/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8646 - loss: 0.5649 - val_accuracy: 0.8800 - val_loss: 0.5052
Epoch 5/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8706 - loss: 0.5220 - val_accuracy: 0.8836 - val_loss: 0.4717
Epoch 6/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8762 - loss: 0.4926 - val_accuracy: 0.8877 - val_loss: 0.4482
Epoch 7/200
[1m375/37

<keras.src.callbacks.history.History at 0x19b8a052f90>

In [9]:
#  Evaluate the model
test_loss, test_acc = model.evaluate(X_test, Y_test)
print('Test accuracy:', test_acc)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9218 - loss: 0.2774  
Test accuracy: 0.9218000173568726


# Improving the simple net in TensorFlow with hidden layers (`page 16 / 47 of the notes`)

In [4]:
import tensorflow as tf
from tensorflow import keras

# Network and training
EPOCHS = 50
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 

# Loading MNIST dataset.
# Labels have one-hot representation.
mnist = keras.datasets.mnist
(X_train, Y_train),(X_test, Y_test) = mnist.load_data()

# X_train is 60,000 rows of 28x28 values; we reshape it to 60,000 x 784.
RESHAPED = 784
#
X_train = X_train.reshape(60000,RESHAPED)
X_test = X_test.reshape(10000,RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalize inputs to be within [0,1].
X_train, X_test = X_train / 255.0, X_test / 255.0
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# Labels have one-hot representation.
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

60000 train samples
10000 test samples


In [9]:
# Build the model.
model = tf.keras.models.Sequential()
model.add(keras.layers.Dense(N_HIDDEN,
                             input_shape =(RESHAPED,),
                              name = 'dense_layer_1',
                              activation = 'relu'))
model.add(keras.layers.Dense(N_HIDDEN,
                              name = 'dense_layer_2',
                              activation = 'relu'))
model.add(keras.layers.Dense(NB_CLASSES,
                              name = 'dense_layer_3',
                              activation = 'softmax'))

# Summary of the model
model.summary()

# Compiling the model.
model.compile(optimizer='SGD',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Training the model.
model.fit(X_train, Y_train,
          batch_size = BATCH_SIZE,
          epochs = EPOCHS,
          verbose = VERBOSE,
          validation_split = VALIDATION_SPLIT)

Epoch 1/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6481 - loss: 1.4045 - val_accuracy: 0.8440 - val_loss: 0.6996
Epoch 2/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8520 - loss: 0.5774 - val_accuracy: 0.8834 - val_loss: 0.4447
Epoch 3/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8822 - loss: 0.4344 - val_accuracy: 0.8988 - val_loss: 0.3699
Epoch 4/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8953 - loss: 0.3773 - val_accuracy: 0.9079 - val_loss: 0.3325
Epoch 5/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9035 - loss: 0.3446 - val_accuracy: 0.9139 - val_loss: 0.3094
Epoch 6/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9089 - loss: 0.3217 - val_accuracy: 0.9179 - val_loss: 0.2918
Epoch 7/50
[1m375/375[0m 

<keras.src.callbacks.history.History at 0x20f3dff3620>

In [11]:
# Evaluating the model.
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print('Test accuracy:',test_accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9649 - loss: 0.1183
Test accuracy: 0.964900016784668


# Further improving the simple net in TensorFlow with dropout (`page 19 / 50 of the notes`)

In [12]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

# Network and training
EPOCHS = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 
DROPOUT = 0.3

# Loading MNIST dataset.
# Labels have one-hot representation.
mnist = keras.datasets.mnist
(X_train, Y_train),(X_test, Y_test) = mnist.load_data()

# X_train is 60,000 rows of 28x28 values; we reshape it to 60,000 x 784.
RESHAPED = 784
#
X_train = X_train.reshape(60000,RESHAPED)
X_test = X_test.reshape(10000,RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Normalize inputs to be within [0,1].
X_train, X_test = X_train / 255.0, X_test / 255.0
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

# Labels have one-hot representation.
Y_train = tf.keras.utils.to_categorical(Y_train, NB_CLASSES)
Y_test = tf.keras.utils.to_categorical(Y_test, NB_CLASSES)

60000 train samples
10000 test samples


In [13]:
# Build the model.
model = tf.keras.models.Sequential()
model.add(keras.layers.Dense(N_HIDDEN,
                             input_shape =(RESHAPED,),
                              name = 'dense_layer_1',
                              activation = 'relu'))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(N_HIDDEN,
                              name = 'dense_layer_2',
                              activation = 'relu'))
model.add(keras.layers.Dropout(DROPOUT))
model.add(keras.layers.Dense(NB_CLASSES,
                              name = 'dense_layer_3',
                              activation = 'softmax'))

# Summary of the model
model.summary()

# Compiling the model.
model.compile(optimizer='SGD',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
# Training the model.
model.fit(X_train, Y_train,
          batch_size = BATCH_SIZE,
          epochs = EPOCHS,
          verbose = VERBOSE,
          validation_split = VALIDATION_SPLIT)

Epoch 1/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.4739 - loss: 1.6867 - val_accuracy: 0.8194 - val_loss: 0.8833
Epoch 2/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7263 - loss: 0.9029 - val_accuracy: 0.8690 - val_loss: 0.5266
Epoch 3/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7896 - loss: 0.6913 - val_accuracy: 0.8870 - val_loss: 0.4271
Epoch 4/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8207 - loss: 0.5902 - val_accuracy: 0.8993 - val_loss: 0.3739
Epoch 5/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8405 - loss: 0.5340 - val_accuracy: 0.9060 - val_loss: 0.3415
Epoch 6/200
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8531 - loss: 0.4918 - val_accuracy: 0.9109 - val_loss: 0.3174
Epoch 7/200
[1m375/37

<keras.src.callbacks.history.History at 0x20f407a6350>

In [15]:
# Evaluating the model.
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print('Test accuracy:',test_accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9760 - loss: 0.0783
Test accuracy: 0.9760000109672546


**N.B:** Training accuracy should be > than Test accuracy.                  
Otherwise, we might be not training for long enough.

# Testing different optimizers in TensorFlow (`page 22 / 53 of the notes`)

1) RMSProp --> It is faster than SDG since we are able to achieve in only 10 epochs an accuracy of 0.9772 on test dataset.

In [17]:
# Compiling the model.
model.compile(optimizer='RMSProp',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

# Training the model.
model.fit(X_train, Y_train,
          batch_size = BATCH_SIZE,
          epochs = 10,
          verbose = VERBOSE,
          validation_split = VALIDATION_SPLIT)

# Evaluating the model.
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print('Test accuracy:',test_accuracy)

Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9665 - loss: 0.1088 - val_accuracy: 0.9735 - val_loss: 0.0965
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9682 - loss: 0.1028 - val_accuracy: 0.9750 - val_loss: 0.0960
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9701 - loss: 0.0961 - val_accuracy: 0.9738 - val_loss: 0.0934
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9712 - loss: 0.0917 - val_accuracy: 0.9734 - val_loss: 0.1010
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9724 - loss: 0.0879 - val_accuracy: 0.9747 - val_loss: 0.0964
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9738 - loss: 0.0859 - val_accuracy: 0.9758 - val_loss: 0.0881
Epoch 7/10
[1m375/375[0m 

2) Adam

In [18]:
# Compiling the model.
model.compile(optimizer='Adam',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

# Training the model.
model.fit(X_train, Y_train,
          batch_size = BATCH_SIZE,
          epochs = 10,
          verbose = VERBOSE,
          validation_split = VALIDATION_SPLIT)

# Evaluating the model.
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print('Test accuracy:',test_accuracy)

Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9763 - loss: 0.0759 - val_accuracy: 0.9764 - val_loss: 0.0894
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9786 - loss: 0.0703 - val_accuracy: 0.9767 - val_loss: 0.0912
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9770 - loss: 0.0716 - val_accuracy: 0.9781 - val_loss: 0.0832
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9788 - loss: 0.0654 - val_accuracy: 0.9788 - val_loss: 0.0843
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9786 - loss: 0.0650 - val_accuracy: 0.9787 - val_loss: 0.0813
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9804 - loss: 0.0598 - val_accuracy: 0.9784 - val_loss: 0.0910
Epoch 7/10
[1m375/375[0m 