In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# libraries import
import tensorflow as tf
import tensorflow.keras as keras

from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(devices[0], True)

In [3]:
# Put plot of mnist images to showcase each category data and another one for limited size to show its size 

In [4]:
# load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)
print(y_train.shape)

(60000, 28, 28)
(60000,)


In [5]:
# Now in this video we are looking into basic neural network that is fully connected neural network
# That's why we should reshape our data
x_train = x_train.reshape(-1, 28*28) # -1 represents keep 60000 value same just change last 2 numbers
x_test = x_test.reshape(-1, 28*28)
print(x_train.dtype)
print(x_train.shape)

uint8
(60000, 784)


In [6]:
# Let's normalise training and testing dataset but why???
# 1. Normally if your features are in different scale that leads to issue in training because neural network will inclined towards with features who has large scale values
# 2. Large input values are computationaly expensive and memory hungry
# 3. It leads to slower convergences of loss function may create problem in accuracy
x_train = (x_train / 255.0).astype("float32") # Type casting because by default result of it in float64 type
x_test = (x_test / 255.0).astype("float32")

print(x_train.dtype)
print(x_test.dtype)

print(x_train.shape)
print(x_test.shape)

float32
float32
(60000, 784)
(10000, 784)


Internally tensorflow converts numpy array into tensor so we don't have to bother about it

In [7]:
# Sequential API (Suitable for simple, not suitable for complex)
# It only accept one input and throws one output

model = keras.Sequential()
model.add(layers.Dense(512, activation="relu", input_shape=(784,)))
model.add(layers.Dense(256, activation="relu"))
model.add(layers.Dense(10)) # As we have not mentioned any activation function in last layer which is necessary in case of classification
# model.add(layers.Dense(10, activation="softmax")) Alternate version

# We are going to pass output of last layer from to logit that's why you can see "from_logits=True" in loss function
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"])
# model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) Alternate version

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(x_train, y_train, batch_size=1024, epochs=5, verbose=1, validation_split=0.15)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1f6199b9760>

In [10]:
model.evaluate(x_test, y_test, batch_size=1024, verbose=1)



[0.0879177525639534, 0.9728000164031982]

In [11]:
# Debugging model internal layers/ Get particular layer output
debug_seq_model = keras.Sequential(model.layers[:-1])
features = debug_seq_model.predict(x_test)
print(features.shape)

(10000, 256)


In [12]:
# Get each layer features
all_layers_model = keras.Model(inputs=model.inputs, outputs=[layer.output for layer in model.layers])
features = all_layers_model.predict(x_test)

for feature in features:
    print(feature.shape)

(10000, 512)
(10000, 256)
(10000, 10)


In [13]:
# Functional API (Suitable for both simple and complex architecture)
# It only accept one input and throws one output

inputs = layers.Input(shape=(784,), name="input_layer")
dense = layers.Dense(512, activation="relu", name="dense_1")(inputs)
dense = layers.Dense(256, activation="relu", name="dense_2")(dense)
outputs = layers.Dense(10, activation="softmax", name="output_layer")(dense)

functional_model = keras.Model(inputs, outputs)
functional_model.compile(loss=keras.losses.SparseCategoricalCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) # By default from_logits=False

In [14]:
functional_model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 784)]             0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
output_layer (Dense)         (None, 10)                2570      
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________


In [15]:
functional_model.fit(x_train, y_train, batch_size=1024, epochs=5, verbose=1, validation_split=0.15)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1f62a0e0700>

In [16]:
functional_model.evaluate(x_test, y_test, batch_size=1024, verbose=1)



[0.08651512116193771, 0.9739000201225281]

In [17]:
# Debugging model internal layers/ Get particular layer output
debug_func_model = keras.Model(inputs=functional_model.inputs, outputs=[functional_model.get_layer("dense_1").output])
features = debug_func_model.predict(x_test)
print(features.shape)

(10000, 512)
