# AlexNet

Paper: [ImageNet Classification with Deep Convolutional Neural Networks - Krizhevsky et al](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)

![arcitecture](https://cdn-images-1.medium.com/max/1200/1*wzflNwJw9QkjWWvTosXhNw.png)

In [1]:
from keras.layers import Input, Conv2D, MaxPooling2D, Concatenate, Flatten, Dense, Dropout
from keras.models import Model
from keras import backend as K

Using TensorFlow backend.


In [2]:
# If you read they paper they say the input size is 224x224x3
# but this appears to be a mistake and the real shape is 227x227x3
inputs = Input(shape=(227, 227, 3))

# Separate the network into two tracks for historical purposes
# today you would just add the second track as extra filters
x1 = Conv2D(filters=48, kernel_size=(11, 11), strides=(4, 4), activation='relu')(inputs)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x1)
x1 = Conv2D(filters=128, kernel_size=(5, 5), padding='same', activation='relu')(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x1)

x2 = Conv2D(filters=48, kernel_size=(11, 11), strides=(4, 4))(inputs)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x2)
x2 = Conv2D(filters=128, kernel_size=(5, 5), padding='same', activation='relu')(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x2)

x = Concatenate(axis=-1)([x1, x2])

x1 = Conv2D(filters=192, kernel_size=(3, 3), padding='same', activation='relu')(x)
x1 = Conv2D(filters=192, kernel_size=(3, 3), padding='same', activation='relu')(x1)
x1 = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x1)
x1 = Flatten()(x1)

x2 = Conv2D(filters=192, kernel_size=(3, 3), padding='same', activation='relu')(x)
x2 = Conv2D(filters=192, kernel_size=(3, 3), padding='same', activation='relu')(x2)
x2 = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x2)
x2 = Flatten()(x2)

x = Concatenate(axis=-1)([x1, x2])

x1 = Dense(2048, activation='relu')(x)
x2 = Dense(2048, activation='relu')(x)
x = Concatenate(axis=-1)([x1, x2])
x = Dropout(rate=0.5)(x)

x1 = Dense(2048, activation='relu')(x)
x2 = Dense(2048, activation='relu')(x)
x = Concatenate(axis=-1)([x1, x2])
x = Dropout(rate=0.5)(x)

x = Dense(1000, activation='softmax')(x)

alexnet = Model(inputs=inputs, outputs=x)
alexnet.compile(optimizer='sgd', loss='categorical_crossentropy')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [3]:
alexnet.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 227, 227, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 55, 55, 48)   17472       input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 55, 55, 48)   17472       input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 27, 27, 48)   0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_poolin

In [4]:
K.clear_session()

inputs = Input(shape=(227, 227, 3))
# How you would implement it today. This gets more parameters, this is due to the
# original AlexNet skipping concatenations after some conv layers.
x = Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu')(inputs)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)
x = Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu')(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)

x = Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu')(x)
x = Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu')(x)
x = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(x)
x = Flatten()(x)

x = Dense(4096, activation='relu')(x)
x = Dropout(rate=0.5)(x)

x = Dense(4096, activation='relu')(x)
x = Dropout(rate=0.5)(x)

x = Dense(1000, activation='softmax')(x)

alexnet = Model(inputs=inputs, outputs=x)
alexnet.compile(optimizer='sgd', loss='categorical_crossentropy')

In [5]:
alexnet.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 227, 227, 3)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 55, 55, 96)        34944     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 27, 27, 96)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 27, 27, 256)       614656    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 256)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 13, 13, 384)       885120    
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 13, 384)       1327488   
__________