### Do refer to the original notebook for a different interesting implementation

In [122]:
import tensorflow as tf
from tensorflow import keras

In [123]:
tf.config.list_physical_devices()
# tf.config.list_logical_devices()


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [124]:
data = keras.datasets.mnist.load_data()

In [125]:
type(data[0][0]) # tuple
len(data) # 2
len(data[0]) # 2
len(data[0][0]) # 60000
type(data[0][0]) # numpy.ndarray()
data[0][0].shape # (60000, 28, 28)
(x_train, y_train), (x_test, y_test) = data

In [126]:
x_train.shape
x_test.shape
y_train.shape
y_test.shape


(10000,)

In [127]:
list(y_train[:5])

[5, 0, 4, 1, 9]

In [128]:
encoding_layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(num_tokens=10, output_mode='binary')
y_train_enc = encoding_layer(list(y_train))
y_test_enc = encoding_layer(list(y_test))
# encoding_layer([3,2,1,0])


In [129]:
# normalize
x_train = x_train/255.
x_test = x_test/255.

In [187]:
# simplistic models can have high Dropout(0.5). 
# Since this model is a more sophisticated version, reduced dropout to 0.25 to reduce bias.
dr = 0.25
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dropout(dr),
    tf.keras.layers.Dense(10, activation='softmax'),
])

In [188]:
adam_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)

### Use -
* bin cross ent -> single/two classes</br>
* cat. CE -> multiple classes
* sparse cat. CE -> a convenience function with added functionality over cat. CE where you only specify the integer index of the class 

### sparse_categorical_crossentropy and CategoricalAccuracy to be used with y_train only as shown below

In [189]:
if 0:
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy'#, metrics=['accuracy'] )
              , metrics=[tf.keras.metrics.CategoricalAccuracy() ] )
    model.fit(x_train, y_train, batch_size=32, epochs=5)

In [190]:
model.compile(optimizer=adam_optimizer, loss='categorical_crossentropy', metrics=['accuracy'] )
#               , metrics=[tf.keras.metrics.CategoricalAccuracy() ] )
#                          , tf.keras.metrics.AUC() \
#                          , tf.keras.metrics.Precision(), tf.keras.metrics.Recall() ] )
#                          , tf.keras.metrics.RecallAtPrecision() \
#                         , tf.keras.metrics.PrecisionAtRecall()])

In [191]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_7 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_31 (Dense)             (None, 128)               100480    
_________________________________________________________________
dropout_24 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_32 (Dense)             (None, 128)               16512     
_________________________________________________________________
dropout_25 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_33 (Dense)             (None, 64)                8256      
_________________________________________________________________
dropout_26 (Dropout)         (None, 64)               

In [192]:
y_train[0]
type(y_train_enc.numpy())
type(x_train)
0.95**50

tf.math.exp(-0.1)
0.9**10
0.9**25


/home/aman/Desktop/projects/repos/tutorials/tf_docs/site/en/tutorials/quickstart


0.0717897987691853

In [193]:
chkpt_callback = tf.keras.callbacks.ModelCheckpoint("dumped_models/mnist_{epoch:04d}_{val_accuracy:.4f}_{val_loss:.2f}.hdf5")

# progbarlogger
progbar_callback = tf.keras.callbacks.ProgbarLogger(count_mode='steps')

# CSVLogger
csv_callback = tf.keras.callbacks.CSVLogger('dumped_models/training.log')

# earlystopping
earlystopping_callback = tf.keras.callbacks.EarlyStopping(patience=20)

# LR scheduler
def custom_scheduler_func(epoch, lr):
    if epoch > 0 and epoch%10 == 0:
        return lr*0.9
    return lr

lr_callback = tf.keras.callbacks.LearningRateScheduler(custom_scheduler_func, verbose=1)

# tensorboard
tb_callback = tf.keras.callbacks.TensorBoard('dumped_models/logs', update_freq=250)

# RemoteMonitor
remote_monitor_callback = tf.keras.callbacks.RemoteMonitor()

# Reduce on plateau
reduce_plateau_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10
                                                              , verbose=1)

callbacks = [chkpt_callback, earlystopping_callback, lr_callback, tb_callback, progbar_callback, csv_callback
             , remote_monitor_callback, reduce_plateau_callback ]


In [194]:
%%time
history = model.fit(x_train, y_train_enc.numpy(), batch_size=32, epochs=50, verbose=2, validation_split=0.2
                    , callbacks=callbacks)


Epoch 00001: LearningRateScheduler reducing learning rate to 0.0010000000474974513.
Epoch 1/50
1500/1500 - 4s - loss: 1.5008 - accuracy: 0.4253 - val_loss: 0.7825 - val_accuracy: 0.7218 - lr: 0.0010

Epoch 00002: LearningRateScheduler reducing learning rate to 0.0010000000474974513.
Epoch 2/50
1500/1500 - 3s - loss: 0.9329 - accuracy: 0.6675 - val_loss: 0.5741 - val_accuracy: 0.8128 - lr: 0.0010

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0010000000474974513.
Epoch 3/50
1500/1500 - 3s - loss: 0.7482 - accuracy: 0.7510 - val_loss: 0.4532 - val_accuracy: 0.8472 - lr: 0.0010

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0010000000474974513.
Epoch 4/50
1500/1500 - 4s - loss: 0.6427 - accuracy: 0.7879 - val_loss: 0.4230 - val_accuracy: 0.8478 - lr: 0.0010

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0010000000474974513.
Epoch 5/50
1500/1500 - 3s - loss: 0.5975 - accuracy: 0.8104 - val_loss: 0.3741 - val_accuracy: 0.9021 - lr: 0.0010



Epoch 00029: LearningRateScheduler reducing learning rate to 0.0008100000559352338.
Epoch 29/50
1500/1500 - 3s - loss: 0.2669 - accuracy: 0.9404 - val_loss: 0.2025 - val_accuracy: 0.9678 - lr: 8.1000e-04

Epoch 00030: LearningRateScheduler reducing learning rate to 0.0008100000559352338.
Epoch 30/50
1500/1500 - 4s - loss: 0.2562 - accuracy: 0.9425 - val_loss: 0.2285 - val_accuracy: 0.9643 - lr: 8.1000e-04

Epoch 00031: LearningRateScheduler reducing learning rate to 0.0007290000503417104.
Epoch 31/50
1500/1500 - 4s - loss: 0.2475 - accuracy: 0.9442 - val_loss: 0.2037 - val_accuracy: 0.9690 - lr: 7.2900e-04

Epoch 00032: LearningRateScheduler reducing learning rate to 0.0007290000794455409.
Epoch 32/50
1500/1500 - 3s - loss: 0.2541 - accuracy: 0.9431 - val_loss: 0.2005 - val_accuracy: 0.9666 - lr: 7.2900e-04

Epoch 00033: LearningRateScheduler reducing learning rate to 0.0007290000794455409.
Epoch 33/50
1500/1500 - 3s - loss: 0.2491 - accuracy: 0.9435 - val_loss: 0.2095 - val_accuracy:

In [178]:
model.save('dumped_models/tmp_mnist_model')

INFO:tensorflow:Assets written to: dumped_models/tmp_mnist_model/assets


In [15]:
model.evaluate(x_test,y_test)



[0.1270398050546646, 0.9646999835968018]

In [16]:
model.evaluate(x_test[:1], y_test[:1])



[4.410734163684538e-06, 1.0]

In [19]:
y_pred = model.predict(x_test)

In [21]:
y_test[:10]

array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9], dtype=uint8)

In [29]:
import numpy as np
class_idx = np.argmax( y_pred[:10], axis=1)
class_idx

array([7, 2, 1, 0, 4, 1, 4, 9, 6, 9])