# Exercise 12

In [1]:
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.backend as K
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class my_LayerNormalization(keras.layers.Layer):
    def build(self, batch_input_shape):
        self.alpha = self.add_weight(name="alpha",
                                     shape=batch_input_shape[-1:],
                                     dtype="float32",
                                     initializer="ones",
                                     trainable=True)
        self.beta = self.add_weight(name="beta",
                                    shape=batch_input_shape[-1:],
                                    dtype="float32",
                                    initializer="zeros",
                                    trainable=True)
        super().build(batch_input_shape)
    
    def call(self, X):
        mu, sig_sq = tf.nn.moments(X, axes=-1, keepdims=True)
        sig = tf.math.sqrt(sig_sq)
        epsilon = tf.constant(0.001, dtype="float32", name="epsilon")
        return self.alpha * (X - mu) / (sig + epsilon) + self.beta
        
    
    def compute_output_shape(self, batch_input_shape):
        return tf.TensorShape(batch_input_shape.as_list())

In [3]:
my_ln = my_LayerNormalization()
ln = keras.layers.LayerNormalization()

In [4]:
my_ln.compute_output_shape(tf.TensorShape([13, 22, 2]))

TensorShape([13, 22, 2])

In [5]:
ln.compute_output_shape(tf.TensorShape([13, 22, 2]))

TensorShape([13, 22, 2])

In [6]:
ln.build([3,3])
my_ln.build([3,3])

In [7]:
X = tf.constant([[0.2,23,13], [4,5,6], [7,8,9]], dtype="float32")
X

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[ 0.2, 23. , 13. ],
       [ 4. ,  5. ,  6. ],
       [ 7. ,  8. ,  9. ]], dtype=float32)>

In [8]:
ln.call(X)

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-1.271681  ,  1.1716609 ,  0.10001975],
       [-1.2238274 ,  0.        ,  1.2238274 ],
       [-1.2238274 ,  0.        ,  1.2238274 ]], dtype=float32)>

In [9]:
my_ln.call(X)

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-1.2715518 ,  1.171542  ,  0.10000969],
       [-1.2232467 ,  0.        ,  1.2232467 ],
       [-1.2232467 ,  0.        ,  1.2232467 ]], dtype=float32)>

# Exercise 13

In [69]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train / 255.0
X_test = X_test / 255.0
y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

#X_train, y_train = X_train[:3000], y_train[:3000]

In [70]:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [71]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"))
model.add(keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))
model.add(keras.layers.Dense(10, activation="softmax", kernel_initializer="glorot_normal"))

In [72]:
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result())
                         for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics,
          end=end)

In [73]:
n_epochs = 20
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam()
loss_fn = keras.losses.categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.Accuracy()]

In [74]:
def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

In [75]:
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    val_loss = tf.reduce_mean(loss_fn(y_test, model(X_test, training=False)))
    val_acc = keras.metrics.Accuracy()(y_test, model(X_test, training=False))
    print("\nvalidation loss / accuracy: {} / {}".format(val_loss, val_acc))
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Epoch 1/20


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

60000/60000 - mean: 0.4532 - accuracy: 0.0000

validation loss / accuracy: 0.47989600896835327 / 0.00013000000035390258
Epoch 2/20
60000/60000 - mean: 0.3449 - accuracy: 0.0004

validation loss / accuracy: 0.37144985795021057 / 0.0009399999980814755
Epoch 3/20
60000/60000 - mean: 0.3003 - accuracy: 0.0020

validation loss / accuracy: 0.3503665626049042 / 0.003370000049471855
Epoch 4/20
60000/60000 - mean: 0.2835 - accuracy: 0.0039

validation loss / accuracy: 0.37425991892814636 / 0.005530000198632479
Epoch 5/20
60000/60000 - mean: 0.2629 - accuracy: 0.0066

validation loss / accuracy: 0.38405683636665344 / 0.006490000057965517
Epoch 6/20
60000/60000 - mean: 0.2511 - accuracy: 0.

In [77]:
optimizer1 = keras.optimizers.Nadam()
optimizer2 = keras.optimizers.Adam()
lower = keras.models.Sequential()
upper = keras.models.Sequential()
lower.add(keras.layers.Flatten(input_shape=[28, 28]))
lower.add(keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"))
upper.add(keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))
upper.add(keras.layers.Dense(10, activation="softmax", kernel_initializer="glorot_normal"))

In [80]:
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train)
        with tf.GradientTape(persistent=True) as tape:
            low = lower(X_batch, training=True)
            y_pred = upper(low, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + upper.losses + lower.losses)
        gradients_upper = tape.gradient(loss, upper.trainable_variables)
        optimizer1.apply_gradients(zip(gradients_upper, upper.trainable_variables))
        gradients_lower = tape.gradient(loss, lower.trainable_variables)
        del tape
        optimizer2.apply_gradients(zip(gradients_lower, lower.trainable_variables))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    val_loss = tf.reduce_mean(loss_fn(y_test, upper(lower(X_test, training=False), training=False)))
    val_acc = keras.metrics.Accuracy()(y_test, upper(lower(X_test, training=False), training=False))
    print("\nvalidation loss / accuracy: {} / {}".format(val_loss, val_acc))
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Epoch 1/20
60000/60000 - mean: 0.4372 - accuracy: 0.0001

validation loss / accuracy: 0.40593862533569336 / 0.0002099999983329326
Epoch 2/20
60000/60000 - mean: 0.3447 - accuracy: 0.0004

validation loss / accuracy: 0.40674111247062683 / 0.0008399999933317304
Epoch 3/20
60000/60000 - mean: 0.3173 - accuracy: 0.0028

validation loss / accuracy: 0.3466922342777252 / 0.005900000222027302
Epoch 4/20
60000/60000 - mean: 0.2890 - accuracy: 0.0066

validation loss / accuracy: 0.35494741797447205 / 0.006680000107735395
Epoch 5/20
60000/60000 - mean: 0.2741 - accuracy: 0.0092

validation loss / accuracy: 0.3858548402786255 / 0.00937000010162592
Epoch 6/20
60000/60000 - mean: 0.2587 - accuracy: 0.0117

validation loss / accuracy: 0.36618533730506897 / 0.01119999960064888
Epoch 7/20
60000/60000 - mean: 0.2444 - accuracy: 0.0132

validation loss / accuracy: 0.35157474875450134 / 0.014000000432133675
Epoch 8/20
60000/60000 - mean: 0.2391 - accuracy: 0.0139

validation loss / accuracy: 0.38122242689