<a href="https://colab.research.google.com/github/jvishnuvardhan/Medium_Articles/blob/master/ModelCheckpoint_Callback.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Import modules
import tensorflow as tf
from tensorflow import keras
import os, datetime

Epochs = 10
# Load data
mnist = tf.keras.datasets.mnist

# Split the data to train and test
(x_train, y_train),(x_test, y_test) = mnist.load_data()

# Process the data
x_train, x_test = x_train / 255.0, x_test / 255.0

# Define model
def create_model():
  model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
    ])
  
  # Compile the model
  model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy']) #,experimental_steps_per_execution=10
  return model

# Create a basic model instance
model=create_model()



In [12]:
# Define ModelCheckpoint callback (replace checkpoint after each epoch)
checkpoint_path_1 = "training_1/cp.ckpt"
checkpoint_dir_1 = os.path.dirname(checkpoint_path_1)

# Create a callback that saves the model's weights
checkpoint_callback_1 = tf.keras.callbacks.ModelCheckpoint(
   checkpoint_path_1, verbose=1, save_weights_only=True,
   # Save weights, every epoch.
   save_freq='epoch')

# Train the model
model.fit(x_train, y_train, epochs=Epochs,validation_data=(x_test, y_test), callbacks=[checkpoint_callback_1])

Epoch 1/10
Epoch 00001: saving model to training_1/cp.ckpt
Epoch 2/10
Epoch 00002: saving model to training_1/cp.ckpt
Epoch 3/10
Epoch 00003: saving model to training_1/cp.ckpt
Epoch 4/10
Epoch 00004: saving model to training_1/cp.ckpt
Epoch 5/10
Epoch 00005: saving model to training_1/cp.ckpt
Epoch 6/10
Epoch 00006: saving model to training_1/cp.ckpt
Epoch 7/10
Epoch 00007: saving model to training_1/cp.ckpt
Epoch 8/10
Epoch 00008: saving model to training_1/cp.ckpt
Epoch 9/10
Epoch 00009: saving model to training_1/cp.ckpt
Epoch 10/10
Epoch 00010: saving model to training_1/cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x7f0139214c88>

In [13]:
# Create a basic model instance
model_1 = create_model()

# Loads the weights
model_1.load_weights(checkpoint_path_1)

# Re-evaluate the model
loss,acc = model_1.evaluate(x_test,  y_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

313/313 - 0s - loss: 0.0731 - accuracy: 0.9820
Restored model, accuracy: 98.20%


In [14]:
# Define ModelCheckpoint callback (replace checkpoint after each epoch)
checkpoint_path_2 = "training_2/cp-{epoch:04d}.ckpt"
# checkpoint_path_2 = "training_2/cp-{epoch:04d}-val_loss-{val_loss:.2f}.ckpt"
checkpoint_dir_2 = os.path.dirname(checkpoint_path_2)

# Create a callback that saves the model's weights
checkpoint_callback_2 = tf.keras.callbacks.ModelCheckpoint(
   checkpoint_path_2, verbose=1, save_weights_only=True,
   # Save weights, every epoch.
   save_freq='epoch')

# Train the model
model.fit(x_train, y_train, epochs=Epochs,validation_data=(x_test, y_test), callbacks=[checkpoint_callback_2])

Epoch 1/10
Epoch 00001: saving model to training_2/cp-0001.ckpt
Epoch 2/10
Epoch 00002: saving model to training_2/cp-0002.ckpt
Epoch 3/10
Epoch 00003: saving model to training_2/cp-0003.ckpt
Epoch 4/10
Epoch 00004: saving model to training_2/cp-0004.ckpt
Epoch 5/10
Epoch 00005: saving model to training_2/cp-0005.ckpt
Epoch 6/10
Epoch 00006: saving model to training_2/cp-0006.ckpt
Epoch 7/10
Epoch 00007: saving model to training_2/cp-0007.ckpt
Epoch 8/10
Epoch 00008: saving model to training_2/cp-0008.ckpt
Epoch 9/10
Epoch 00009: saving model to training_2/cp-0009.ckpt
Epoch 10/10
Epoch 00010: saving model to training_2/cp-0010.ckpt


<tensorflow.python.keras.callbacks.History at 0x7f0138944668>

In [15]:
# Create a basic model instance
model_2 = create_model()

# access latest checkpoint
latest = tf.train.latest_checkpoint(checkpoint_dir_2)

# Loads the weights
model_2.load_weights(latest)

# Re-evaluate the model
loss,acc = model_2.evaluate(x_test,  y_test, verbose=2)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

313/313 - 0s - loss: 0.1098 - accuracy: 0.9806
Restored model, accuracy: 98.06%


In [16]:
###### Checkpointing every 100th batch
checkpoint_path_3 = "training_3/ckpt-loss={loss:.4f}"
checkpoint_dir_3 = os.path.dirname(checkpoint_path_3)

# Create a callback that saves the model's weights
checkpoint_callback_3 = tf.keras.callbacks.ModelCheckpoint(
   filepath=checkpoint_path_3, verbose=1, save_weights_only=True,
   # Save weights, every epoch.
   save_freq=100)

# Train the model
model.fit(x_train, y_train, epochs=1, validation_data=(x_test, y_test), callbacks=[checkpoint_callback_3])

  85/1875 [>.............................] - ETA: 5s - loss: 0.0139 - accuracy: 0.9934
Epoch 00001: saving model to training_3/ckpt-loss=0.0148
 193/1875 [==>...........................] - ETA: 5s - loss: 0.0112 - accuracy: 0.9950
Epoch 00001: saving model to training_3/ckpt-loss=0.0114
 299/1875 [===>..........................] - ETA: 5s - loss: 0.0110 - accuracy: 0.9952
Epoch 00001: saving model to training_3/ckpt-loss=0.0110
 388/1875 [=====>........................] - ETA: 4s - loss: 0.0113 - accuracy: 0.9950
Epoch 00001: saving model to training_3/ckpt-loss=0.0110
Epoch 00001: saving model to training_3/ckpt-loss=0.0107
Epoch 00001: saving model to training_3/ckpt-loss=0.0102
Epoch 00001: saving model to training_3/ckpt-loss=0.0100
Epoch 00001: saving model to training_3/ckpt-loss=0.0112
Epoch 00001: saving model to training_3/ckpt-loss=0.0121
Epoch 00001: saving model to training_3/ckpt-loss=0.0120
Epoch 00001: saving model to training_3/ckpt-loss=0.0121
Epoch 00001: saving model

<tensorflow.python.keras.callbacks.History at 0x7f013abf7fd0>

In [17]:
# Saving Best model
checkpoint_path_4 = "training_4/cp-{epoch:04d}.ckpt"
checkpoint_dir_4 = os.path.dirname(checkpoint_path_4)

# Create a callback that saves the model's weights
checkpoint_callback_4 = tf.keras.callbacks.ModelCheckpoint(
   checkpoint_path_4, monitor='val_accuracy', verbose=1, save_weights_only=True,
   # Save weights, every epoch.
   save_freq='epoch',mode='auto',save_best_only=True)


# Train the model
model.fit(x_train, y_train, epochs=Epochs,validation_data=(x_test, y_test), callbacks=[checkpoint_callback_4])

Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.98200, saving model to training_4/cp-0001.ckpt
Epoch 2/10
Epoch 00002: val_accuracy did not improve from 0.98200
Epoch 3/10
Epoch 00003: val_accuracy improved from 0.98200 to 0.98250, saving model to training_4/cp-0003.ckpt
Epoch 4/10
Epoch 00004: val_accuracy improved from 0.98250 to 0.98550, saving model to training_4/cp-0004.ckpt
Epoch 5/10
Epoch 00005: val_accuracy did not improve from 0.98550
Epoch 6/10
Epoch 00006: val_accuracy did not improve from 0.98550
Epoch 7/10
Epoch 00007: val_accuracy did not improve from 0.98550
Epoch 8/10
Epoch 00008: val_accuracy did not improve from 0.98550
Epoch 9/10
Epoch 00009: val_accuracy did not improve from 0.98550
Epoch 10/10
Epoch 00010: val_accuracy did not improve from 0.98550


<tensorflow.python.keras.callbacks.History at 0x7f013abc0c88>

In [23]:
# Saving weights after each epoch (h5 format)
checkpoint_path_5 = "Epoch_{epoch:04d}_weights.h5"

# checkpoint_dir = os.path.dirname(checkpoint_path)
checkpoint_callback_5 = tf.keras.callbacks.ModelCheckpoint(
   checkpoint_path_5, verbose=1, save_weights_only=True,
   # Save weights, every epoch.
   save_freq='epoch')

# Train the model
model.fit(x_train, y_train, epochs=Epochs,validation_data=(x_test, y_test), callbacks=[checkpoint_callback_5])

Epoch 1/10
Epoch 00001: saving model to Epoch_0001_weights.h5
Epoch 2/10
Epoch 00002: saving model to Epoch_0002_weights.h5
Epoch 3/10
Epoch 00003: saving model to Epoch_0003_weights.h5
Epoch 4/10
Epoch 00004: saving model to Epoch_0004_weights.h5
Epoch 5/10
Epoch 00005: saving model to Epoch_0005_weights.h5
Epoch 6/10
Epoch 00006: saving model to Epoch_0006_weights.h5
Epoch 7/10
Epoch 00007: saving model to Epoch_0007_weights.h5
Epoch 8/10
Epoch 00008: saving model to Epoch_0008_weights.h5
Epoch 9/10
Epoch 00009: saving model to Epoch_0009_weights.h5
Epoch 10/10
Epoch 00010: saving model to Epoch_0010_weights.h5


<tensorflow.python.keras.callbacks.History at 0x7f013aaffdd8>