In [None]:
'''
This code is to test Alexnet/VGG16 deep CNN networks on imagenet dataset, with and without kerception/kervolution.
'''

In [None]:
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import cifar10
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from tf_keras_kervolution_2d import KernelConv2D      # Kervolution
from tf_keras_kervolution_2d import LinearKernel      # Equivalent to normal convolution
from tf_keras_kervolution_2d import L1Kernel          # Manhattan distance
from tf_keras_kervolution_2d import L2Kernel          # Euclidean distance
from tf_keras_kervolution_2d import PolynomialKernel  # Polynomial
from tf_keras_kervolution_2d import GaussianKernel    # Gaussin / RBF
from tf_keras_kervolution_2d import SigmoidKernel     # Sigmoid
from models.Alexnet import Alexnet

In [None]:
# Commands needed for GPU program
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
# Training parameters
batch_size = 128  # orig paper trained all networks with batch_size=128
epochs = 200
data_augmentation = False
num_classes = 10
# Model version
# version = 1 (Alexnet), version = 2 (VGG16)
version = 1

In [None]:
# Load data.
(x_train, y_train), (x_test, y_test) = #Imagenet Data

In [None]:
# Input image dimensions.
input_shape = x_train.shape[1:]

In [None]:
# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [None]:
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print('y_train shape:', y_train.shape)

In [None]:
# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
print('y_train shape:',y_train.shape)

In [None]:
def lr_schedule(epoch):
    """Learning Rate Schedule
    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.
    # Arguments
        epoch (int): The number of epochs
    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

In [None]:
class Kerception_blockC(tf.keras.layers.Layer):
    '''
    Customized kervolution 2D + ratio proportional [0.1, 0.1, 0.2, 0.3, 0.3] inception block with total 16 filters.
    Scheduling algorithm: allocation = floor(total_filters*ratio) -> remainder = total_filters - sum(allocation) -> Do allocation[-index]++ and remainder-- and index++ while remainder >= 0.
    '''
    def __init__(self):

        super(Kerception_blockC,self).__init__()
        self.kernel_fn1 = LinearKernel()
        self.kconv1 = KernelConv2D(filters=1, kernel_size=3, padding='same', kernel_function=self.kernel_fn1)
        self.kernel_fn2 = SigmoidKernel()
        self.kconv2 = KernelConv2D(filters=3, kernel_size=3, padding='same', kernel_function=self.kernel_fn2)
        self.kernel_fn3 = GaussianKernel(gamma=1.0, trainable_gamma=True, initializer='he_normal')
        self.kconv3 = KernelConv2D(filters=3, kernel_size=3, padding='same', kernel_function=self.kernel_fn3)
        self.kernel_fn4 = PolynomialKernel(p=3, trainable_c=True, initializer='he_normal')
        self.kconv4 = KernelConv2D(filters=6, kernel_size=3, padding='same', kernel_function=self.kernel_fn4)
        self.kernel_fn5 = PolynomialKernel(p=5, trainable_c=True, initializer='he_normal')
        self.kconv5 = KernelConv2D(filters=3, kernel_size=3, padding='same', kernel_function=self.kernel_fn5)

    def call(self, x):
        x1 = self.kconv1(x)
        x2 = self.kconv2(x)
        x3 = self.kconv3(x)
        x4 = self.kconv4(x)
        x5 = self.kconv5(x)

        return tf.keras.layers.concatenate([x1, x2, x3, x4, x5], axis = 3)

In [None]:
def dynamic_scheduling(num_filters):
    '''
    This function is to return a list of number of filters for each kernel functions in Kerception block.
    Input: 
    num_filters: total number of filters.
    Output:
    filter_num: Number of filters for each kernel function.
    '''
    return

In [None]:
if version == 1:
    model = Alexnet(input_shape=(224,224,3), output_shape=num_classes)
else:
    pass

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=lr_schedule(0)),
              metrics=['accuracy',tf.keras.metrics.SparseTopKCategoricalAccuracy()])

In [None]:
lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

In [None]:
callbacks = [lr_reducer, lr_scheduler]

In [None]:
# Run training, with or without data augmentation.
if not data_augmentation:
    print('Not using data augmentation.')
    history = model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True,
              callbacks=callbacks)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        # set input mean to 0 over the dataset
        featurewise_center=False,
        # set each sample mean to 0
        samplewise_center=False,
        # divide inputs by std of dataset
        featurewise_std_normalization=False,
        # divide each input by its std
        samplewise_std_normalization=False,
        # apply ZCA whitening
        zca_whitening=False,
        # epsilon for ZCA whitening
        zca_epsilon=1e-06,
        # randomly rotate images in the range (deg 0 to 180)
        rotation_range=0,
        # randomly shift images horizontally
        width_shift_range=0.1,
        # randomly shift images vertically
        height_shift_range=0.1,
        # set range for random shear
        shear_range=0.,
        # set range for random zoom
        zoom_range=0.,
        # set range for random channel shifts
        channel_shift_range=0.,
        # set mode for filling points outside the input boundaries
        fill_mode='nearest',
        # value used for fill_mode = "constant"
        cval=0.,
        # randomly flip images
        horizontal_flip=True,
        # randomly flip images
        vertical_flip=False,
        # set rescaling factor (applied before any other transformation)
        rescale=None,
        # set function that will be applied on each input
        preprocessing_function=None,
        # image data format, either "channels_first" or "channels_last"
        data_format=None,
        # fraction of images reserved for validation (strictly between 0 and 1)
        validation_split=0.0)

    # Compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                        validation_data=(x_test, y_test),
                        epochs=epochs, verbose=1, workers=4,
                        callbacks=callbacks)

In [None]:
# Plot results:
plt.figure(1)
plt.plot(history.history['accuracy'], label='Training Accuracy', linewidth=2, markersize=8)
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2, markersize=8)
plt.title('Imagenet')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc="upper left")
plt.grid()
plt.show()

In [None]:
plt.figure(2)
plt.plot(history.history['sparse_top_k_categorical_accuracy'], label='Training Top-5 Accuracy', linewidth=2, markersize=8)
plt.plot(history.history['val_sparse_top_k_categorical_accuracy'], label='Validation Top-5 Accuracy', linewidth=2, markersize=8)
plt.title('Imagenet')
plt.ylabel('Top-5 Accuracy')
plt.xlabel('Epoch')
plt.legend(loc="upper left")
plt.grid()
plt.show()

In [None]:
print("Best validation top-1 accuracy is:{}".format(max(history.history['val_accuracy'])))

In [None]:
print("Best validation top-5 accuracy is:{}".format(max(history.history['val_sparse_top_k_categorical_accuracy'])))