In [1]:
import tensorflow as tf

mnist = tf.keras.datasets.mnist.load_data()
(X_train, y_train), (X_test, y_test) = mnist

X_train, X_test = X_train/255. , X_test/255.

X_train[0].shape

(28, 28)

In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(200, activation="relu"),
    tf.keras.layers.Dense(200, activation="relu"),
    tf.keras.layers.Dense(10, activation="softmax"),
])


optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### L1 and L2 Regularization Techniques

In [8]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(
        200, activation="relu", kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l2(0.01)
    ),
    tf.keras.layers.Dense(
        200, activation="relu", kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l2(0.01)
    ),
    tf.keras.layers.Dense(
        10, activation="softmax", kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l2(0.01)
    ),
])


optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Using L1 and L2 Together

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(
        200, activation="relu", kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l1_l2(0.01)
    ),
    tf.keras.layers.Dense(
        200, activation="relu", kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l1_l2(0.01)
    ),
    tf.keras.layers.Dense(
        10, activation="softmax", kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l1_l2(0.01)
    ),
])


optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Code Refactoring

In [11]:
from functools import partial

RegularizedDense = partial(
    tf.keras.layers.Dense,
    activation="relu",
    kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l2(0.01)
)

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    RegularizedDense(200),
    RegularizedDense(200),
    RegularizedDense(10, activation="softmax")
])


optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Dropout

In [12]:
RegularizedDense = partial(
    tf.keras.layers.Dense,
    activation="relu",
    kernel_initializer="he_normal", kernel_regularizer=tf.keras.regularizers.l2(0.01)
)


model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    tf.keras.layers.Dropout(rate=0.2),
    RegularizedDense(100),
    tf.keras.layers.Dropout(rate=0.2),
    RegularizedDense(100),
    tf.keras.layers.Dropout(rate=0.2),
    RegularizedDense(10, activation="softmax")
])

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [14]:
model.evaluate(X_test, y_test)



[1.2686586380004883, 0.9125000238418579]

### MCDropout

In [24]:
class MCDropout(tf.keras.layers.Dropout):
    def call(self, inputs, training=False):
        return super().call(inputs, training=False)

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28,28]),
    MCDropout(rate=0.2),
    RegularizedDense(100),
    MCDropout(rate=0.2),
    RegularizedDense(100),
    MCDropout(rate=0.2),
    RegularizedDense(10, activation="softmax")
])

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
model.compile(
    loss = tf.keras.losses.sparse_categorical_crossentropy,
    optimizer = optimizer,
    metrics = ["accuracy"]
)

history = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [27]:
model.evaluate(X_test, y_test)



[1.2838364839553833, 0.911899983882904]