<a href="https://colab.research.google.com/github/timothyvanco/machineLearning/blob/master/ML_Learning/checkpoint_models/cifar10_checkpoint_improvements_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from sklearn.preprocessing import LabelBinarizer

# enable to checkpoint and serialize network to disk whenever find an improvement in model performance
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.datasets import cifar10
import argparse
import os

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend as K

In [0]:
class MiniVGGNet:
    @staticmethod
    def build(width, height, depth, classes):
        # initialize model along with input shape to be "channeôs last"
        model = Sequential()
        inputShape = (height, width, depth)
        channelDimension = -1                           # -1 = last ordering

        # if "channels first" update input shape and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            channelDimension = 1                        # batch normalization operatesover channels - in order to apply
                                                        # BN need to know which axis to normalize over, 1 = first order


        # first layer - (CONV => RELU => BN) * 2 => POOL => DO
        model.add(Conv2D(32, (3, 3), padding="same", input_shape = inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=channelDimension))

        model.add(Conv2D(32, (3, 3), padding="same", input_shape = inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=channelDimension))

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))    # node from POOL layer will be randomly disconnected from next layer with prob 25%

        # second layer - (CONV => RELU => BN) * 2 => POOL => DO
        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=channelDimension))

        model.add(Conv2D(64, (3, 3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=channelDimension))

        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))


        # FC => RELU layers
        model.add(Flatten())
        model.add(Dense(512))                   # 512 nodes
        model.add(Activation("relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))                 # increasing probability to 50%

        model.add(Dense(classes))
        model.add(Activation("softmax"))

        return model

In [0]:
# will store weights during training process
#ap = argparse.ArgumentParser()
#ap.add_argument("-w", "--weights", required=True, help="path to weights directory")
#args = vars(ap.parse_args())

In [3]:
# load dataset - training and testing data, then scale it to [0, 1]
print("[INFO] loading CIFAR-10 data...")
((trainX, trainY), (testX, testY)) = cifar10.load_data()
trainX = trainX.astype("float") / 255.0
testX = testX.astype("float") / 255.0

# convert labels from integers to vectors
labBin = LabelBinarizer()
trainY = labBin.fit_transform(trainY)
testY = labBin.transform(testY)

# initialize optimizer and model
print("[INFO] compiling model...")
opt = SGD(lr=0.01, decay=0.01/40, momentum=0.9, nesterov=True)
model = MiniVGGNet.build(width=32, height=32, depth=3, classes=10)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

[INFO] loading CIFAR-10 data...
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[INFO] compiling model...
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [30]:
# Load the Drive helper and mount
from google.colab import drive
filePath = "/content/drive"
drive.mount(filePath)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
model.save_weights('my_model_weights.h5')
model.save('my_model.h5')

In [0]:
my_callbacks = [
    EarlyStopping(patience=4, verbose=1),
    ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00001, verbose=1),
    ModelCheckpoint(filepath = filePath + '_my_model.h5', monitor="val_loss", mode="min", verbose=1, save_best_only=True, save_weights_only=False) 
    ]

In [0]:
my_callbacks = [ModelCheckpoint(filepath = filePath + '_my_model.h5', monitor="val_loss", mode="min", verbose=1, save_best_only=True, save_weights_only=False)]

In [36]:
# train network
print("[INFO] training network...")
H = model.fit(trainX, trainY, validation_data=(testX, testY), batch_size=64, epochs=40, callbacks=my_callbacks, verbose=2)

[INFO] training network...
Train on 50000 samples, validate on 10000 samples
Epoch 1/40

Epoch 00001: val_loss improved from inf to 0.55238, saving model to /content/drive_my_model.h5
50000/50000 - 18s - loss: 0.4369 - acc: 0.8441 - val_loss: 0.5524 - val_acc: 0.8109
Epoch 2/40

Epoch 00002: val_loss did not improve from 0.55238
50000/50000 - 18s - loss: 0.4362 - acc: 0.8449 - val_loss: 0.5525 - val_acc: 0.8112
Epoch 3/40

Epoch 00003: val_loss did not improve from 0.55238
50000/50000 - 18s - loss: 0.4322 - acc: 0.8476 - val_loss: 0.5524 - val_acc: 0.8110
Epoch 4/40

Epoch 00004: val_loss did not improve from 0.55238
50000/50000 - 18s - loss: 0.4303 - acc: 0.8454 - val_loss: 0.5528 - val_acc: 0.8113
Epoch 5/40

Epoch 00005: val_loss improved from 0.55238 to 0.55164, saving model to /content/drive_my_model.h5
50000/50000 - 18s - loss: 0.4328 - acc: 0.8470 - val_loss: 0.5516 - val_acc: 0.8114
Epoch 6/40

Epoch 00006: val_loss did not improve from 0.55164
50000/50000 - 18s - loss: 0.4311 