In [1]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("fivethirtyeight")
%load_ext tensorboard

In [2]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
tf.random.set_seed(42)
np.random.seed(42)

LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),  #if we want we can define activation function seprate apart from layer
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation="softmax")]


model = tf.keras.models.Sequential(LAYERS)

In [5]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266,610
Trainable params: 266,610
Non-trai

In [7]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 10s - loss: 1.2491 - accuracy: 0.6119 - val_loss: 0.8632 - val_accuracy: 0.7186 - 10s/epoch - 6ms/step
Epoch 2/10
1719/1719 - 5s - loss: 0.7835 - accuracy: 0.7421 - val_loss: 0.7015 - val_accuracy: 0.7696 - 5s/epoch - 3ms/step
Epoch 3/10
1719/1719 - 5s - loss: 0.6748 - accuracy: 0.7788 - val_loss: 0.6351 - val_accuracy: 0.7918 - 5s/epoch - 3ms/step
Epoch 4/10
1719/1719 - 4s - loss: 0.6163 - accuracy: 0.7972 - val_loss: 0.5822 - val_accuracy: 0.8078 - 4s/epoch - 3ms/step
Epoch 5/10
1719/1719 - 5s - loss: 0.5790 - accuracy: 0.8083 - val_loss: 0.5505 - val_accuracy: 0.8202 - 5s/epoch - 3ms/step
Epoch 6/10
1719/1719 - 5s - loss: 0.5524 - accuracy: 0.8149 - val_loss: 0.5280 - val_accuracy: 0.8282 - 5s/epoch - 3ms/step
Epoch 7/10
1719/1719 - 4s - loss: 0.5324 - accuracy: 0.8208 - val_loss: 0.5098 - val_accuracy: 0.8328 - 4s/epoch - 2ms/step
Epoch 8/10
1719/1719 - 5s - loss: 0.5171 - accuracy: 0.8241 - val_loss: 0.5028 - val_accuracy: 0.8330 - 5s/epoch - 3ms/step
Epoch 

#BN approach one

In [None]:
del model  #we can delete previous model using this..

In [8]:
LAYERS_BN = [
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation="softmax")
]

model = tf.keras.models.Sequential(LAYERS_BN)

In [9]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization (BatchN  (None, 784)              3136      
 ormalization)                                                   
                                                                 
 dense_3 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_1 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 dense_4 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_2 (Batc  (None, 100)             

In [10]:
784 * 4, 300 * 4, 100*4 # mean, variance, gamma and beta {calculating the model.summary() mathematically.}

(3136, 1200, 400)

In [12]:
sum([3136, 1200, 400])  #this are the layers created. 

4736

In [13]:
266610 + 4736  #from previous summary parameter of  trained model. if we add the batch normalied layers. we get present total parms

271346

In [14]:
4736/2 # out of 4 we only train gamma and beta. and mean, variance are calculated internally. non trainable parms

2368.0

In [15]:
266610 + 4736/2


268978.0

In [16]:
bn1 = model.layers[1]   #to know which one is trainable and which is not. {now checking layer 1}

In [17]:
bn1

<keras.layers.normalization.batch_normalization.BatchNormalization at 0x7f529149bf40>

In [19]:
for variable in bn1.variables:
  print(variable.name, variable.trainable)  #true that = gama, beta are trainable. false = mean & variance not-trainable

batch_normalization/gamma:0 True
batch_normalization/beta:0 True
batch_normalization/moving_mean:0 False
batch_normalization/moving_variance:0 False


In [20]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])



In [None]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

# BN approach 2  {here only we are applaying seprately Batch normalization.}

In [None]:
del model

In [21]:
LAYERS_BN_BIAS_FALSE = [
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(100, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(10, activation="softmax")
]

model = tf.keras.models.Sequential(LAYERS_BN_BIAS_FALSE)

In [22]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_3 (Batc  (None, 784)              3136      
 hNormalization)                                                 
                                                                 
 dense_6 (Dense)             (None, 300)               235200    
                                                                 
 batch_normalization_4 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 activation (Activation)     (None, 300)               0         
                                                                 
 dense_7 (Dense)             (None, 100)              