In [0]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt

In [0]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train / 255.
X_test = X_test / 255.

In [25]:
input_img = Input(shape=(28, 28))
x = Flatten()(input_img)
x = Dense(50, activation="relu")(x)
classification_layer = Dense(10)(x)

model = Model(input_img, classification_layer)
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["accuracy"])
model.summary()

model.fit(X_train, y_train,
          epochs=10,
          batch_size=128,
          shuffle=True,
          validation_data=(X_test, y_test))

Model: "model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 28, 28)]          0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 50)                39250     
_________________________________________________________________
dense_15 (Dense)             (None, 10)                510       
Total params: 39,760
Trainable params: 39,760
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f001f298780>

In [26]:
input_img = Input(shape=(28, 28))
x = Flatten()(input_img)
x = Dense(150, activation="relu")(x)
classification_layer = Dense(10)(x)

model = Model(input_img, classification_layer)
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["accuracy"])
model.summary()

model.fit(X_train, y_train,
          epochs=10,
          batch_size=128,
          shuffle=True,
          validation_data=(X_test, y_test))

Model: "model_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        [(None, 28, 28)]          0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 150)               117750    
_________________________________________________________________
dense_17 (Dense)             (None, 10)                1510      
Total params: 119,260
Trainable params: 119,260
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f001f35ef60>

In [35]:
input_img = Input(shape=(28, 28))
x = Flatten()(input_img)
x = Dense(50, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(l=0.001))(x)
classification_layer = Dense(10)(x)

model = Model(input_img, classification_layer)
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["accuracy"])
model.summary()

model.fit(X_train, y_train,
          epochs=10,
          batch_size=128,
          shuffle=True,
          validation_data=(X_test, y_test))

Model: "model_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_20 (InputLayer)        [(None, 28, 28)]          0         
_________________________________________________________________
flatten_18 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_34 (Dense)             (None, 50)                39250     
_________________________________________________________________
dense_35 (Dense)             (None, 10)                510       
Total params: 39,760
Trainable params: 39,760
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f0017f90390>

In [36]:
input_img = Input(shape=(28, 28))
x = Flatten()(input_img)
x = Dense(250, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(l=0.001))(x)
classification_layer = Dense(10)(x)

model = Model(input_img, classification_layer)
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.01),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["accuracy"])
model.summary()

model.fit(X_train, y_train,
          epochs=10,
          batch_size=128,
          shuffle=True,
          validation_data=(X_test, y_test))

Model: "model_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_21 (InputLayer)        [(None, 28, 28)]          0         
_________________________________________________________________
flatten_19 (Flatten)         (None, 784)               0         
_________________________________________________________________
dense_36 (Dense)             (None, 250)               196250    
_________________________________________________________________
dense_37 (Dense)             (None, 10)                2510      
Total params: 198,760
Trainable params: 198,760
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f00183259b0>

|                          |Classification accuracy|  |
|--------------------------|--------|--------|
|                          |Training|Testing |
| 50HLN+no regularization  | 0.9803 | 0.9674 |
| 50HLN+L2 regularization  | 0.9469 | 0.9498 |
| 250HLN+no regularization | 0.9856 | 0.9702 |
| 250HLN+L2 regularization | 0.9465 | 0.9453 |

The number of epochs is set to 10 and the learning rate to 0.01 for all configurations.

The table shows that the regularization term has a negative impact on the training accuracy when compared to the MLPs with no regularization term. However, the testing error is now approximately the same as the training error, meaning that the model extrapolates well to unseen data. This is opposed to the case with no regularization, where the testing accuracy is lower than the training accuracy.