In [None]:
"""
What? Checkpointing in Keras

Deep learning models can take hours, days or even weeks to train and if a training run is stopped unexpectedly,
you can lose a lot of work. Checkpointing offers a solution.


"""

In [2]:
### Import python modules
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
import numpy

In [None]:
"""
Application checkpointing is a fault tolerance technique for long running processes. It is an approach where a 
snapshot of the state of the system is taken in case of system failure. If there is a problem, not all is lost.
The checkpoint may be used directly, or used as the starting point for a new run, picking up where it left off. 
When training deep learning models, the checkpoint captures the weights of the model. These weights can be used
to make predictions as-is, or used as the basis for ongoing training.
"""

### Checkpoint the weights WHEN validation accuracy IMPROVES

In [25]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# load pima indians dataset
dataset = numpy.loadtxt("../DATASETS/pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]

# create model
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer = "uniform" , activation= "relu" ))
model.add(Dense(8, kernel_initializer = "uniform" , activation= "relu" ))
model.add(Dense(1, kernel_initializer= "uniform" , activation= "sigmoid" ))

# Compile model
model.compile(loss= "binary_crossentropy" , optimizer= "adam" , metrics=[ "accuracy" ])

# checkpoint
# OPTION #1 - IF YOU WANT TO NOW WHAT HAPPENS in each step
#filepath = "../OUTPUT/weights-improvement_EPOCH_-{epoch:02d}-_ACURACY_{accuracy:.4f}.hdf5"
# OPTION #2 -  rewriting the same file
filepath = "../OUTPUT/weights-best.hdf5"

#checkpoint = ModelCheckpoint(filepath, monitor = "val_acc" , verbose=1, save_best_only=True, mode= max )
#checkpoint = ModelCheckpoint(filepath, monitor = "val_acc" , verbose=1, save_best_only=True )

checkpoint = ModelCheckpoint(filepath, monitor = "accuracy" , save_weights_only=True, verbose=1, save_best_only=True )

callbacks_list = [checkpoint]
print(callbacks_list)



# Fit the model
model.fit(X, Y, validation_split=0.33, epochs = 150, batch_size=10, callbacks=callbacks_list, verbose=0)

[<tensorflow.python.keras.callbacks.ModelCheckpoint object at 0x14dc6d820>]

Epoch 00001: accuracy improved from -inf to 0.63230, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00002: accuracy improved from 0.63230 to 0.64008, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00003: accuracy did not improve from 0.64008

Epoch 00004: accuracy improved from 0.64008 to 0.65370, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00005: accuracy improved from 0.65370 to 0.65564, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00006: accuracy improved from 0.65564 to 0.66342, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00007: accuracy did not improve from 0.66342

Epoch 00008: accuracy improved from 0.66342 to 0.67704, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00009: accuracy did not improve from 0.67704

Epoch 00010: accuracy did not improve from 0.67704

Epoch 00011: accuracy improved from 0.67704 to 0.68093, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00012


Epoch 00137: accuracy did not improve from 0.78016

Epoch 00138: accuracy did not improve from 0.78016

Epoch 00139: accuracy did not improve from 0.78016

Epoch 00140: accuracy did not improve from 0.78016

Epoch 00141: accuracy improved from 0.78016 to 0.78210, saving model to ../OUTPUT/weights-best.hdf5

Epoch 00142: accuracy did not improve from 0.78210

Epoch 00143: accuracy did not improve from 0.78210

Epoch 00144: accuracy did not improve from 0.78210

Epoch 00145: accuracy did not improve from 0.78210

Epoch 00146: accuracy did not improve from 0.78210

Epoch 00147: accuracy did not improve from 0.78210

Epoch 00148: accuracy did not improve from 0.78210

Epoch 00149: accuracy did not improve from 0.78210

Epoch 00150: accuracy did not improve from 0.78210


<tensorflow.python.keras.callbacks.History at 0x14dcbc6a0>

In [None]:
"""
This is a very simple checkpointing strategy. It may create a lot of unnecessary checkpoint files if the validation 
accuracy moves up and down over training epochs. Nevertheless, it will ensure that you have a snapshot of the best 
model discovered during your run.
"""

### Loading a Saved Neural Network Model

In [None]:
"""
The checkpoint only includes the model weights. It assumes you know the network structure. This too can be serialize
to file in JSON or YAML format. In the example below, the model structure is known and the best weights are loaded 
from the previous experiment, stored in the working directory in the weights.best.hdf5 file. The model is then used
to make predictions on the entire dataset.
"""

In [28]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# create model
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer = "uniform" , activation= "relu" ))
model.add(Dense(8, kernel_initializer = "uniform" , activation= "relu" ))
model.add(Dense(1, kernel_initializer= "uniform" , activation= "sigmoid" ))

# load weights
model.load_weights("../OUTPUT/weights-best.hdf5")

# Compile model (required to make predictions)
model.compile(loss= "binary_crossentropy" , optimizer = "adam" , metrics=[ "accuracy" ])
print("Created model and loaded weights from file")

# load pima indians dataset
dataset = numpy.loadtxt("../DATASETS/pima-indians-diabetes.csv", delimiter=",")

# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]

# estimate accuracy on whole dataset using loaded weights
scores = model.evaluate(X, Y, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Created model and loaded weights from file
accuracy: 75.91%


- https://machinelearningmastery.com/check-point-deep-learning-models-keras/