In [1]:
import numpy as np
import matplotlib.pyplot as plt
import keras
import keras.backend as K

Using TensorFlow backend.


In [2]:
(X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
X_train, X_test = X_train/255, X_test/255

In [3]:
X = X_input = keras.layers.Input(X_train.shape[1:])
X = keras.layers.BatchNormalization()(X)
X = X_skip = keras.layers.Conv2D(16, (3,3), padding='same', kernel_initializer='he_normal')(X)
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.Conv2D(16, (3,3), padding='same', kernel_initializer='he_normal')(X)
X = keras.layers.Add()([X, X_skip])
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.MaxPooling2D()(X)
X = X_skip = keras.layers.Conv2D(32, (3,3), padding='same', kernel_initializer='he_normal')(X)
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.Conv2D(32, (3,3), padding='same', kernel_initializer='he_normal')(X)
X = keras.layers.Add()([X, X_skip])
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.MaxPooling2D()(X)
X = X_skip = keras.layers.Conv2D(64, (3,3), padding='same', kernel_initializer='he_normal')(X)
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.Conv2D(64, (3,3), padding='same', kernel_initializer='he_normal')(X)
X = keras.layers.Add()([X, X_skip])
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.MaxPooling2D()(X)
X = keras.layers.Conv2D(128, (3,3), padding='same', kernel_initializer='he_normal')(X)
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.Conv2D(128, (3,3), padding='valid', kernel_initializer='he_normal')(X)
X = keras.layers.BatchNormalization()(X)
X = keras.layers.Activation('relu')(X)
X = keras.layers.MaxPooling2D()(X)
X = keras.layers.Flatten()(X)
X = keras.layers.Dense(np.max(Y_train)+1, activation='softmax')(X)
M = keras.Model(X_input, X)
M.compile('nadam', 'sparse_categorical_crossentropy', ['acc'])
M.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 3)    12          input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 32, 16)   448         batch_normalization_1[0][0]      
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 32, 32, 16)   64          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation

### search for best cosine annealing max learning rate

In [4]:
hist = M.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=64, epochs=10, callbacks=[
    keras.callbacks.LearningRateScheduler(lambda epoch,lr: 0.5*((epoch+1)/10), verbose=1),
    keras.callbacks.EarlyStopping(monitor='loss', patience=2, verbose=1),
])

Train on 50000 samples, validate on 10000 samples
Epoch 1/10

Epoch 00001: LearningRateScheduler setting learning rate to 0.05.
Epoch 2/10

Epoch 00002: LearningRateScheduler setting learning rate to 0.1.
Epoch 3/10

Epoch 00003: LearningRateScheduler setting learning rate to 0.15.
Epoch 4/10

Epoch 00004: LearningRateScheduler setting learning rate to 0.2.
Epoch 5/10

Epoch 00005: LearningRateScheduler setting learning rate to 0.25.
Epoch 6/10

Epoch 00006: LearningRateScheduler setting learning rate to 0.3.
Epoch 7/10

Epoch 00007: LearningRateScheduler setting learning rate to 0.35.
Epoch 8/10

Epoch 00008: LearningRateScheduler setting learning rate to 0.4.
Epoch 9/10

Epoch 00009: LearningRateScheduler setting learning rate to 0.45.
Epoch 10/10

Epoch 00010: LearningRateScheduler setting learning rate to 0.5.


In [5]:
max_lr = 0.5*((np.argmin(hist.history['loss'])+1)/10)
max_lr

0.45

### cosine annealing

In [6]:
def cosine_annealing(i, lr_min, lr_max):
    i = i + np.power(2,2)
    log_i = np.log2(i)
    t_min,t_max = np.power(2,np.floor(log_i)), np.power(2,np.floor(log_i)+1)-1
    return lr_min + np.cos(((i-t_min)/(t_max-t_min))*(np.pi/2))*(lr_max-lr_min)
M.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=64, epochs=60, callbacks=[
    keras.callbacks.LearningRateScheduler(
        lambda epoch,lr: cosine_annealing(epoch,max_lr*0.25,max_lr), verbose=1)
])

Train on 50000 samples, validate on 10000 samples
Epoch 1/60

Epoch 00001: LearningRateScheduler setting learning rate to 0.45.
Epoch 2/60

Epoch 00002: LearningRateScheduler setting learning rate to 0.40478357377724805.
Epoch 3/60

Epoch 00003: LearningRateScheduler setting learning rate to 0.28125000000000006.
Epoch 4/60

Epoch 00004: LearningRateScheduler setting learning rate to 0.11250000000000002.
Epoch 5/60

Epoch 00005: LearningRateScheduler setting learning rate to 0.45.
Epoch 6/60

Epoch 00006: LearningRateScheduler setting learning rate to 0.4415381703613655.
Epoch 7/60

Epoch 00007: LearningRateScheduler setting learning rate to 0.41657699291706646.
Epoch 8/60

Epoch 00008: LearningRateScheduler setting learning rate to 0.37636812533296005.
Epoch 9/60

Epoch 00009: LearningRateScheduler setting learning rate to 0.3229278081273226.
Epoch 10/60

Epoch 00010: LearningRateScheduler setting learning rate to 0.2589357619521759.
Epoch 11/60

Epoch 00011: LearningRateScheduler sett

Epoch 39/60

Epoch 00039: LearningRateScheduler setting learning rate to 0.40759198294879645.
Epoch 40/60

Epoch 00040: LearningRateScheduler setting learning rate to 0.39891743690447845.
Epoch 41/60

Epoch 00041: LearningRateScheduler setting learning rate to 0.38950766140745574.
Epoch 42/60

Epoch 00042: LearningRateScheduler setting learning rate to 0.3793868112164733.
Epoch 43/60

Epoch 00043: LearningRateScheduler setting learning rate to 0.36858086640881693.
Epoch 44/60

Epoch 00044: LearningRateScheduler setting learning rate to 0.357117565689828.
Epoch 45/60

Epoch 00045: LearningRateScheduler setting learning rate to 0.34502633518804426.
Epoch 46/60

Epoch 00046: LearningRateScheduler setting learning rate to 0.33233821291875004.
Epoch 47/60

Epoch 00047: LearningRateScheduler setting learning rate to 0.3190857691098362.
Epoch 48/60

Epoch 00048: LearningRateScheduler setting learning rate to 0.30530302259449243.
Epoch 49/60

Epoch 00049: LearningRateScheduler setting learning

<keras.callbacks.History at 0x7f3fe858cdd8>