In [1]:
# Importing section

import keras 
import tensorflow as tf
import numpy as np
import time

from keras.datasets import cifar10
from keras.utils import np_utils
from keras.layers import Conv2D, Dense, DepthwiseConv2D, Flatten, Dropout, Activation, MaxPooling2D
from keras.models import Sequential
from keras.optimizers import SGD
from keras.constraints import NonNeg
from keras import backend as K

import matplotlib.pyplot as plt

Using TensorFlow backend.


## Loading Data

In [2]:
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train = X_train / 255.0
X_test = X_test / 255.0

Y_train = np_utils.to_categorical(Y_train)
Y_test = np_utils.to_categorical(Y_test)

In [3]:
class hw_net_1(keras.layers.Layer):
    def __init__(self, num_channels = 3, list_kernels = []):
        super(hw_net_1, self).__init__()
        self.num_channels = num_channels 
        self.list_kernels = list_kernels
        
        bias_initializer = keras.initializers.Constant(value = -3.)
        
        self.hw_1 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last')
        self.carry_1 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer  )
        self.hw_2 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_2 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer  )
        self.hw_3 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_3 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer  )
        self.hw_4 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_4 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer  )
        self.hw_5 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_5 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' , bias_initializer = bias_initializer )
        
    def call(self, states):
        
        C_1 = tf.keras.activations.sigmoid(self.carry_1(states))
        H_1 = tf.keras.activations.relu(self.hw_1(states))
        temp_1_1 = tf.multiply(H_1,C_1)
        temp_1_2 = tf.multiply(states, (1 - C_1))
        temp_1 = tf.add(temp_1_1, temp_1_2)
        C_2 = tf.keras.activations.sigmoid(self.carry_2(temp_1))
        H_2 = tf.keras.activations.relu(self.hw_2(temp_1))
        temp_2_1 = tf.multiply(H_2,C_2)
        temp_2_2 = tf.multiply(temp_1, (1 - C_2))
        temp_2 = tf.add(temp_2_1, temp_2_2)
        C_3 = tf.keras.activations.sigmoid(self.carry_3(temp_2))
        H_3 = tf.keras.activations.relu(self.hw_3(temp_2))
        temp_3_1 = tf.multiply(H_3,C_3)
        temp_3_2 = tf.multiply(temp_2, (1 - C_3))
        temp_3 = tf.add(temp_3_1, temp_3_2)
        C_4 = tf.keras.activations.sigmoid(self.carry_4(temp_3))
        H_4 = tf.keras.activations.relu(self.hw_4(temp_3))
        temp_4_1 = tf.multiply(H_4,C_4)
        temp_4_2 = tf.multiply(temp_3, (1 - C_4))
        temp_4 = tf.add(temp_4_1, temp_1_2)
        C_5 = tf.keras.activations.sigmoid(self.carry_5(temp_4))
        H_5 = tf.keras.activations.relu(self.hw_5(temp_4))
        temp_5_1 = tf.multiply(H_5,C_5)
        temp_5_2 = tf.multiply(temp_4, (1 - C_5))
        final = tf.add(temp_5_1, temp_5_2)
               
        return final

In [4]:
p = hw_net_1()
prova = np.ones((16, 32, 32, 3), dtype = np.float32)
prova = tf.constant(prova)
p(prova)

<tf.Tensor 'hw_net_1_1/Add_4:0' shape=(16, 32, 32, 3) dtype=float32>

In [5]:
class hw_net_2(keras.layers.Layer):
    def __init__(self, num_channels = 3, list_kernels = []):
        super(hw_net_2, self).__init__()
        self.num_channels = num_channels 
        self.list_kernels = list_kernels
        
        bias_initializer = keras.initializers.Constant(value=-3.)
        
        self.hw_1 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_1 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer  )
        self.hw_2 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_2 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer )
        self.hw_3 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_3 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer )
        self.hw_4 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_4 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer )

    def call(self, states):
        
        C_1 = tf.keras.activations.sigmoid(self.carry_1(states))
        H_1 = tf.keras.activations.relu(self.hw_1(states))
        temp_1_1 = tf.multiply(H_1,C_1)
        temp_1_2 = tf.multiply(states, (1 - C_1))
        temp_1 = tf.add(temp_1_1, temp_1_2)
        C_2 = tf.keras.activations.sigmoid(self.carry_2(temp_1))
        H_2 = tf.keras.activations.relu(self.hw_2(temp_1))
        temp_2_1 = tf.multiply(H_2,C_2)
        temp_2_2 = tf.multiply(temp_1, (1 - C_2))
        temp_2 = tf.add(temp_2_1, temp_2_2)
        C_3 = tf.keras.activations.sigmoid(self.carry_3(temp_2))
        H_3 = tf.keras.activations.relu(self.hw_3(temp_2))
        temp_3_1 = tf.multiply(H_3,C_3)
        temp_3_2 = tf.multiply(temp_2, (1 - C_3))
        temp_3 = tf.add(temp_3_1, temp_3_2)
        C_4 = tf.keras.activations.sigmoid(self.carry_4(temp_3))
        H_4 = tf.keras.activations.relu(self.hw_4(temp_3))
        temp_4_1 = tf.multiply(H_4,C_4)
        temp_4_2 = tf.multiply(temp_3, (1 - C_4))
        final = tf.add(temp_4_1, temp_1_2)
               
        return final

In [6]:
class hw_net_3(keras.layers.Layer):
    def __init__(self):
        super(hw_net_3, self).__init__()
        
        bias_initializer = keras.initializers.Constant(value=-3.)

        
        self.hw_1 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_1 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer )
        self.hw_2 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_2 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer )
        self.hw_3 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last' )
        self.carry_3 = DepthwiseConv2D(kernel_size = (2, 2), strides = (1,1), padding = 'same', depth_multiplier = 1, data_format = 'channels_last', bias_initializer = bias_initializer )

    def call(self, states):
        
        C_1 = tf.keras.activations.sigmoid(self.carry_1(states))
        H_1 = tf.keras.activations.relu(self.hw_1(states))
        temp_1_1 = tf.multiply(H_1,C_1)
        temp_1_2 = tf.multiply(states, (1 - C_1))
        temp_1 = tf.add(temp_1_1, temp_1_2)
        C_2 = tf.keras.activations.sigmoid(self.carry_2(temp_1))
        H_2 = tf.keras.activations.relu(self.hw_2(temp_1))
        temp_2_1 = tf.multiply(H_2,C_2)
        temp_2_2 = tf.multiply(temp_1, (1 - C_2))
        temp_2 = tf.add(temp_2_1, temp_2_2)
        C_3 = tf.keras.activations.sigmoid(self.carry_3(temp_2))
        H_3 = tf.keras.activations.relu(self.hw_3(temp_2))
        temp_3_1 = tf.multiply(H_3,C_3)
        temp_3_2 = tf.multiply(temp_2, (1 - C_3))
        final = tf.add(temp_3_1, temp_3_2)
               
        return final

In [7]:
def Model_highway(weights = None):
    
    model = Sequential()
    
    model.add(Conv2D(16, kernel_size = (3, 3), padding = 'same', name = 'conv_1', input_shape = ( 32, 32, 3)))
    model.add(hw_net_1())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding="valid", data_format='channels_last'))
    model.add(Conv2D(32, kernel_size = (3, 3), padding = 'same', name = 'conv_3'))
    model.add(hw_net_2())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid", data_format = 'channels_last'))
    model.add(Conv2D(64, kernel_size = (3, 3), padding = 'same', name = 'conv_4'))
    model.add(hw_net_3())
    model.add(Activation('relu'))
    
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid", data_format = 'channels_last'))
    
    model.add(Flatten())
    model.add(Dropout(0.25))
    
    model.add(Dense(500, name = 'dense_1', activation = 'relu'))
    model.add(Dropout(0.25))
    model.add(Dense(10, name = 'dense_2'))
    model.add(Activation('softmax'))
    
    if weights:
        model.load_weights(weights)
        
    return model

In [8]:
learning_rate = 0.009
weight_decay = 1e-6
momentum = 0.09
epochs = 25
batch_size = 64

In [9]:
model = Model_highway()
model.save_weights('model_weights_cifar10_cnn.h5')

optimizer_no_norm = SGD(lr = learning_rate, decay = weight_decay, momentum = momentum, nesterov = True)
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer_no_norm, metrics = ['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_1 (Conv2D)              (None, 32, 32, 16)        448       
_________________________________________________________________
hw_net_1_2 (hw_net_1)        (None, 32, 32, 16)        800       
_________________________________________________________________
activation_1 (Activation)    (None, 32, 32, 16)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 16)        0         
_________________________________________________________________
conv_3 (Conv2D)              (None, 16, 16, 32)        4640      
_________________________________________________________________
hw_net_2_1 (hw_net_2)        (None, 16, 16, 32)        1280      
_________________________________________________________________
activation_2 (Activation)    (None, 16, 16, 32)       

In [10]:
start = time.time()
history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epochs, batch_size=batch_size, verbose = 1)
end = time.time()
print("time elapsed = {}".format(end - start))

Train on 50000 samples, validate on 10000 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
time elapsed = 232.37295579910278


In [52]:
p(prova)

<tf.Tensor 'hw_net_1_11_1/Add_4:0' shape=(16, 32, 32, 3) dtype=float32>