In [1]:
import os
# ignore unimportant msgs from tensorflow
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

### Enable GPU

In [2]:
len(tf.config.list_physical_devices('GPU'))>0

True

In [3]:
import tensorflow as tf
print(tf.version.VERSION)

2.10.1


In [4]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 13961599702222817789
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 2909221684
locality {
  bus_id: 1
  links {
  }
}
incarnation: 16760494393356069578
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 1050 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1"
xla_global_id: 416903419
]


In [5]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        print("GPU device found")
        tf.config.experimental.set_memory_growth(gpu, True)
        
else:
    print("No GPU device found")

GPU device found


### Load dataset

In [6]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [7]:
print(f"Shape of X_train is: {X_train.shape}")
print(f"Shape of y_train is:{y_train.shape}")

Shape of X_train is: (60000, 28, 28)
Shape of y_train is:(60000,)


- To send images of size 28*28 to NN, we need to flatten them into 1D array, i.e. convert size from 28*28 = 784.
- This is create one column which we can pass to NN for training

In [8]:
# -1 keeps the same number of rows(60000) in the X_train dimension (60000, 28, 28)
# flatten to 1D array and normalize to 0-1 by deviding by 255
X_train = X_train.reshape(-1, 28*28).astype("float32") / 255.0

# to the same for the test set
X_test = X_test.reshape(-1, 28*28).astype("float32") / 255.0

- In TF, it automatically converts numpy arrays to tensors internally, so we don't have to worry about it.

- no need to write this: X_train = tf.convert_to_tensor(X_train)

### Sequential API (Very convenient, not very flexible)

- **Major Limitation:** It only allows you to have 1 input mapped to 1 output

- So, use only when you have 1 input and 1 output

In [9]:
# fully connected neural network
model = tf.keras.Sequential(
    [
        keras.Input(shape=(28*28)), # input layer; useful for printing model summary
        layers.Dense(512, activation="relu"), # first layer
        layers.Dense(256, activation="relu"), # second layer
        layers.Dense(10), # one node for each digit
    ]
)

- model.compile: tell keras how to configure the training part of NN.
- Ex: Here we can specify the loss function

- **Note:** 
1) Use sparse categorical crossentropy when your classes are mutually exclusive (e.g. when each sample belongs exactly to one class) and categorical crossentropy when one sample can have multiple classes or labels are soft probabilities (like [0.5, 0.3, 0.2]).

2) If your targets are one-hot encoded, use categorical_crossentropy. But if your targets are integers, use sparse_categorical_crossentropy


In [10]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               401920    
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


In [12]:
# specifiy the NN config like loss, optimizer, metrics
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True), # softmax activation then map to sparse categorical crossentropy
    optimizer = keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ["accuracy"],
    
)

In [13]:
# verbose = 2, print the loss and accuracy for each epoch
model.fit(X_train, y_train, batch_size = 32, epochs = 5, verbose = 2)

Epoch 1/5
1875/1875 - 11s - loss: 0.1850 - accuracy: 0.9446 - 11s/epoch - 6ms/step
Epoch 2/5
1875/1875 - 10s - loss: 0.0798 - accuracy: 0.9747 - 10s/epoch - 5ms/step
Epoch 3/5
1875/1875 - 11s - loss: 0.0544 - accuracy: 0.9822 - 11s/epoch - 6ms/step
Epoch 4/5
1875/1875 - 11s - loss: 0.0398 - accuracy: 0.9870 - 11s/epoch - 6ms/step
Epoch 5/5
1875/1875 - 9s - loss: 0.0331 - accuracy: 0.9896 - 9s/epoch - 5ms/step


<keras.callbacks.History at 0x2b5d1dff400>

In [14]:
model.evaluate(X_test, y_test, batch_size = 32, verbose = 2)

313/313 - 2s - loss: 0.0799 - accuracy: 0.9774 - 2s/epoch - 6ms/step


[0.0799143835902214, 0.977400004863739]

### Alternative way to add layers sequentially

- This method is useful to print model summary after adding each layer (useful for debugging layer by layer)

In [15]:
model = keras.Sequential()
model.add(keras.Input(shape = (28*28)))
model.add(layers.Dense(512, activation = "relu"))
model.summary() # print the model summary
model.add(layers.Dense(256, activation = "relu", name = "my_layer"))
model.summary() # print the model summary
model.add(layers.Dense(10, activation = "softmax")) # final layer


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 512)               401920    
                                                                 
Total params: 401,920
Trainable params: 401,920
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 512)               401920    
                                                                 
 my_layer (Dense)            (None, 256)               131328    
                                                                 
Total params: 533,248
Trainable params: 533,248
Non-trainable params: 0
_________________________________________________________________


### Method to extract specific layer outputs, useful for debugging

In [16]:
# to get spefic layer output
# model = keras.Model(inputs = model.inputs, outputs = [model.layers[-2].out])
# or use below code
model = keras.Model(inputs = model.inputs, outputs = [model.get_layer("my_layer").output])

In [None]:
# if you want to get all the layers output then use below code
# model = keras.Model(inputs = model.inputs, outputs = [layer.output for layer in model.layers])

# # here we get all the features from the model
# features = model.predict(X_train)

# for feature in features:
#     print(feature.shape)

In [18]:
feature = model.predict(X_train)



In [19]:
print(feature.shape)

(60000, 256)


### Funtional API (A bit more flexible)

- Can handle multiple inputs and multiple outputs

In [20]:
inputs = keras.Input(shape = (28*28))
x = layers.Dense(512, activation = "relu", name = "first_layer")(inputs) # call the layer on the input
x = layers.Dense(256, activation = "relu", name = "second_layer")(x) # call the layer on the output of the previous layer
outputs = layers.Dense(10, activation = "softmax")(x) # call the layer on the output of the previous layer
model = keras.Model(inputs = inputs, outputs = outputs, name = "mnist_model")


In [21]:
print(model.summary())

Model: "mnist_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 784)]             0         
                                                                 
 first_layer (Dense)         (None, 512)               401920    
                                                                 
 second_layer (Dense)        (None, 256)               131328    
                                                                 
 dense_5 (Dense)             (None, 10)                2570      
                                                                 
Total params: 535,818
Trainable params: 535,818
Non-trainable params: 0
_________________________________________________________________
None


In [22]:
# specifiy the NN config like loss, optimizer, metrics
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False), # we already mentioned softmax activation in the last layer. By default, from_logits is False
    optimizer = keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ["accuracy"],
    
)

In [23]:
# verbose = 1, print the loss and accuracy for each epoch with an animated progress bar
model.fit(X_train, y_train, batch_size = 32, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2b58167e400>

In [24]:
model.evaluate(X_test, y_test, batch_size = 32, verbose = 1)



[0.07802246510982513, 0.9790999889373779]