In [None]:
import tensorflow as tf
import numpy as np
import time, datetime
import matplotlib.pyplot as plt
import pickle

# constants definition
CNN_NAME = 'custom_cnn'
EPOCHS = 1
NUM_CLASSES = 10
WEIGHT_DECAY = 1e-4
BATCH_SIZE_TRAIN = 64
BATCH_SIZE_TEST = 128


# build the neural net model
def build_model(weight_decay=1e-4):
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same',
                  kernel_regularizer=tf.keras.regularizers.l2(weight_decay), input_shape=x_train.shape[1:]))
  model.add(tf.keras.layers.Activation('elu'))
  model.add(tf.keras.layers.BatchNormalization())
  model.add(tf.keras.layers.Conv2D(32, (3, 3), padding='same',
                                   kernel_regularizer=tf.keras.regularizers.l2(weight_decay)))
  model.add(tf.keras.layers.Activation('elu'))
  model.add(tf.keras.layers.BatchNormalization())
  model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(tf.keras.layers.Dropout(0.2))

  model.add(tf.keras.layers.Conv2D(64, (3, 3), padding='same',
                                   kernel_regularizer=tf.keras.regularizers.l2(weight_decay)))
  model.add(tf.keras.layers.Activation('elu'))
  model.add(tf.keras.layers.BatchNormalization())
  model.add(tf.keras.layers.Conv2D(64, (3, 3), padding='same',
                                   kernel_regularizer=tf.keras.regularizers.l2(weight_decay)))
  model.add(tf.keras.layers.Activation('elu'))
  model.add(tf.keras.layers.BatchNormalization())
  model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(tf.keras.layers.Dropout(0.3))

  model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same',
                                   kernel_regularizer=tf.keras.regularizers.l2(weight_decay)))
  model.add(tf.keras.layers.Activation('elu'))
  model.add(tf.keras.layers.BatchNormalization())
  model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same',
                                   kernel_regularizer=tf.keras.regularizers.l2(weight_decay)))
  model.add(tf.keras.layers.Activation('elu'))
  model.add(tf.keras.layers.BatchNormalization())
  model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
  model.add(tf.keras.layers.Dropout(0.4))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(NUM_CLASSES, activation='softmax'))

  return model


# schedule the learning rate according to which epoch it is
def lr_schedule(epoch):
  learning_rate = 0.001
  if epoch > 75:
      learning_rate = 0.0005
  elif epoch > 100:
      learning_rate = 0.0003
  return learning_rate

In [None]:
# load cifar10 dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# apply z-score
mean = np.mean(x_train, axis=(0, 1, 2, 3))
std = np.std(x_train, axis=(0, 1, 2, 3))
x_train = (x_train-mean)/(std+1e-7)
x_test = (x_test-mean)/(std+1e-7)

# convert to one-hot vectors
y_train = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)

model = build_model(weight_decay=WEIGHT_DECAY)
model.summary()

# data augmentation
data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    )
data_generator.fit(x_train)

# set optimizer and compile model
opt_rms = tf.keras.optimizers.RMSprop(learning_rate=0.001, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy'])

In [None]:
# training
start = time.time()

print("\tStart training [", time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()), "]\n")
train_history = model.fit(data_generator.flow(x_train, y_train, batch_size=BATCH_SIZE_TRAIN),
                    steps_per_epoch=x_train.shape[0] // BATCH_SIZE_TRAIN, epochs=EPOCHS,
                    validation_data=(x_test, y_test),
                    callbacks=[tf.keras.callbacks.LearningRateScheduler(lr_schedule)],
                    verbose=1)
print("\n\tEnd training [", time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()), "]")

end = time.time()
print("\n\tTotal training time:", datetime.timedelta(seconds=round(end - start, 0)))

In [None]:
# testing
scores = model.evaluate(x_test, y_test, batch_size=BATCH_SIZE_TEST, verbose=1)
print('\nTest result: %.3f loss: %.3f' % (scores[1]*100, scores[0]))

In [None]:
# save to disk
with open(f"{CNN_NAME}_{EPOCHS}_model.json", 'w') as model_file:
  model_file.write(model.to_json())
model.save_weights(f"{CNN_NAME}_{EPOCHS}_weights.h5")

with open(f"{CNN_NAME}_{EPOCHS}_history.sav", 'wb') as history_file:
  pickle.dump(train_history.history, history_file)

In [None]:
# load history
history = pickle.load(open(f"{CNN_NAME}_{EPOCHS}_history.sav", "rb"))

plt.figure(figsize=(15.0, 9.0))
plt.xlabel('Epoch')

#plt.plot(history['loss'])
#plt.plot(history['val_loss'])
#plt.title('Custom CNN loss')
#plt.ylabel('Loss')
#plt.legend(['Train', 'Validation'], loc='upper center')

plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title('Custom CNN accuracy')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='lower center')

plt.show()

In [None]:
# show gpu info colab
# gpu_info = !nvidia-smi
# gpu_info = '\n'.join(gpu_info)
# if gpu_info.find('failed') >= 0:
#  print('Not connected to a GPU')
# else:
#   print(gpu_info)