In [None]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time

plt.style.use('fivethirtyeight')
%load_ext tensorboard

In [2]:
(X_train_full, y_train_full), (X_test,y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid , X_train = X_train_full[:5000], X_train_full[5000:]
y_valid , y_train = y_train_full[:5000], y_train_full[5000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1us/step


In [10]:
from tensorflow.keras import Sequential,Input
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LeakyReLU

In [15]:
tf.random.set_seed(42) # 42 is for getting similar output as it split in same way
np.random.seed(42)

model = Sequential([
    Input(shape=(28, 28)),
    Flatten(),
    Dense(300, kernel_initializer='he_normal'),
    LeakyReLU(),
    Dense(100, kernel_initializer='he_normal'),
    LeakyReLU(),
    Dense(10,activation='softmax') 
])

LOSS='sparse_categorical_crossentropy'
OPTIMIZER = tf.keras.optimizers.SGD(learning_rate=1e-3)
METRIC = ['accuracy']
model.compile(loss=LOSS, optimizer=OPTIMIZER, metrics=METRIC)

In [16]:
model.summary()

In [None]:
import time
start = time.time()
history = model.fit(X_train, y_train,epochs=10, validation_data=(X_valid, y_valid), verbose=2)
end = time.time()

print(f'Total Training time is {end - start}')

Epoch 1/10
1719/1719 - 4s - 3ms/step - accuracy: 0.5853 - loss: 1.3607 - val_accuracy: 0.7284 - val_loss: 0.8926
Epoch 2/10
1719/1719 - 5s - 3ms/step - accuracy: 0.7500 - loss: 0.7962 - val_accuracy: 0.7736 - val_loss: 0.7059
Epoch 3/10
1719/1719 - 5s - 3ms/step - accuracy: 0.7824 - loss: 0.6773 - val_accuracy: 0.7956 - val_loss: 0.6274
Epoch 4/10
1719/1719 - 4s - 3ms/step - accuracy: 0.7982 - loss: 0.6173 - val_accuracy: 0.8080 - val_loss: 0.5808
Epoch 5/10
1719/1719 - 4s - 3ms/step - accuracy: 0.8092 - loss: 0.5789 - val_accuracy: 0.8190 - val_loss: 0.5491
Epoch 6/10
1719/1719 - 3s - 2ms/step - accuracy: 0.8165 - loss: 0.5517 - val_accuracy: 0.8244 - val_loss: 0.5259
Epoch 7/10
1719/1719 - 3s - 2ms/step - accuracy: 0.8222 - loss: 0.5313 - val_accuracy: 0.8290 - val_loss: 0.5083
Epoch 8/10
1719/1719 - 3s - 2ms/step - accuracy: 0.8265 - loss: 0.5154 - val_accuracy: 0.8324 - val_loss: 0.4944
Epoch 9/10
1719/1719 - 4s - 2ms/step - accuracy: 0.8295 - loss: 0.5024 - val_accuracy: 0.8350 - 

#### Applying Batch Normalization

In [37]:
from tensorflow.keras.layers import BatchNormalization

model2 = Sequential([
    Input(shape=(28,28)),
    Flatten(),
    BatchNormalization(),
    Dense(300, activation='relu'),
    BatchNormalization(),
    Dense(100, activation='relu'),
    BatchNormalization(),
    Dense(10,activation='softmax')
])

In [38]:
model2.summary()

In [43]:
model2.compile(loss=LOSS, optimizer=tf.keras.optimizers.SGD(learning_rate=0.00005),metrics=METRIC)

In [44]:
start = time.time()
history2 = model2.fit(X_train, y_train,epochs=10, validation_data=(X_valid, y_valid), verbose=2)
end = time.time()

print(f'Total Training time is {end - start}')

Epoch 1/10
1719/1719 - 9s - 5ms/step - accuracy: 0.9477 - loss: 0.1426 - val_accuracy: 0.8912 - val_loss: 0.3845
Epoch 2/10
1719/1719 - 9s - 5ms/step - accuracy: 0.9486 - loss: 0.1401 - val_accuracy: 0.8906 - val_loss: 0.3825
Epoch 3/10
1719/1719 - 8s - 5ms/step - accuracy: 0.9495 - loss: 0.1380 - val_accuracy: 0.8902 - val_loss: 0.3808
Epoch 4/10
1719/1719 - 7s - 4ms/step - accuracy: 0.9503 - loss: 0.1361 - val_accuracy: 0.8894 - val_loss: 0.3793
Epoch 5/10
1719/1719 - 7s - 4ms/step - accuracy: 0.9509 - loss: 0.1345 - val_accuracy: 0.8900 - val_loss: 0.3780
Epoch 6/10
1719/1719 - 6s - 4ms/step - accuracy: 0.9516 - loss: 0.1330 - val_accuracy: 0.8906 - val_loss: 0.3769
Epoch 7/10
1719/1719 - 7s - 4ms/step - accuracy: 0.9520 - loss: 0.1317 - val_accuracy: 0.8910 - val_loss: 0.3760
Epoch 8/10
1719/1719 - 10s - 6ms/step - accuracy: 0.9521 - loss: 0.1306 - val_accuracy: 0.8916 - val_loss: 0.3751
Epoch 9/10
1719/1719 - 10s - 6ms/step - accuracy: 0.9525 - loss: 0.1295 - val_accuracy: 0.8918 

In [45]:
## Hence batch normalization has imporoved the accuracy of the model as seen above within same time limit