In [1]:
# Implementation of AlexNet with TF 2.0

# Reference: https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf

# Remarks: The original implementation make use of 2 GPU. 
#          Due to the lack of ressources, this implementation is designed to work on a single CPU.

In [94]:
import tensorflow as tf
from tensorflow.keras import layers, models

import numpy as np

print(tf.__version__)
print(tf.keras.__version__)

#TODO: rewrite a version in TF2.0

2.0.0-rc0
2.2.4-tf


In [95]:
### Get the data

In [96]:
### Pre-process the data (data augmentation)

In [129]:
### Create LRN customer layer in Keras

class LRN_layer(tf.keras.layers.Layer):
    ''' Create a layer for computing the Local Response normalization.
        The activations are averaged over adjacent channels at the same spatial position.
        This layer is described in more detail in AlexNet paper.
    '''

    def __init__(self, k = 2, n = 5, alpha = 10**(-4), beta = 0.75, **kwargs):
        #self.output_dim = output_dim
        self.k = k
        self.n = n
        self.alpha = alpha
        self.beta = beta
        super(LRN_layer, self).__init__(**kwargs)

    def build(self, input_shape):
        # This layer has no trainable weights
        super(LRN_layer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        ''' Apply the transformation on x
        '''
        # Initialize a list of tensor to stack the result of each batch
        output_list = []
        # Compute the normalization factor for each activation function
        for c in range(n_c):
            # Compute the lower and upper limit of the average 
            begin = max(0, c - int(self.n/2.0))
            end = min(n_c - 1, c + int(self.n/2.0))
            for h in range(n_h):
                for w in range(n_w):
                    output_list.append(tf.reduce_sum(tf.square(x[:, begin:end, h, w]), axis = 1))
                    
        # Create a tensor containing the sum in the denominator for each batch
        sum_adjacent = tf.stack(output_list)
        # Normalize the activation
        # k prevents the division by 0 which can be caused by ReLu
        x = tf.divide(x, tf.pow((self.k + self.alpha*sum_adjacent), tf.cast(self.beta, tf.float32)))
        
        return x

    def compute_output_shape(self, input_shape):
        # The LRN layer does not change the output shape
        return input_shape

In [133]:
### Build the model

# L1: CONV(11 x 11 x 3 x 96, stride = 4) -> ReLU
# L2: CONV(5 x 5 x 96 x 256) -> ReLU -> Response normalization -> MaxPooling(3 x 3, stride = 2)
# L3: CONV(3 x 3 x 256 x 384) -> ReLU -> Response normalization -> MaxPooling(3 x 3, stride = 2)
# L4: CONV(3 x 3 x 384 x 384) -> ReLU 
# L5: CONV(3 x 3 x 384 x 256) -> ReLU -> MaxPooling(3 x 3, stride = 2)

# L6: FC(4096) -> ReLU
# L7: FC(4096) -> ReLU -> Flatten
# L8: FC(1000) -> Softmax

# Instanciate a LRN_layer
#lrn_layer = LRN_layer()
#print(lrn_layer(tf.ones((10, 3, 5, 5))))

model = tf.keras.Sequential()
# Layer 1:
model.add(layers.Conv2D(96, (11, 11), strides = 4, activation = 'relu', input_shape = (227, 227, 3)))

# Layer 2:
model.add(layers.Conv2D(256, (5, 5), activation = 'relu'))
model.add(LRN_layer())
model.add(layers.MaxPooling2D(pool_size=(3, 3), strides=2))

# Layer 3:
model.add(layers.Conv2D(384, (3, 3), activation = 'relu'))
model.add(LRN_layer())
model.add(layers.MaxPooling2D(pool_size=(3, 3), strides=2))

# Layer 4:
model.add(layers.Conv2D(384, (3, 3), activation = 'relu'))

# Layer 5:
model.add(layers.Conv2D(256, (3, 3), activation = 'relu'))
model.add(layers.MaxPooling2D(pool_size=(3, 3), strides=2))

model.add(layers.Flatten())
# Layer 6:
model.add(layers.Dense(4096, activation = 'relu'))
model.add(layers.Dropout(0.5))

# Layer 7:
model.add(layers.Dense(4096, activation = 'relu'))
model.add(layers.Dropout(0.5))

# Layer 8:
model.add(layers.Dense(1000, activation = 'softmax'))

In [None]:
### Training time vs Test time (average cropped images)