In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[![View on GitHub][github-badge]][github-basic] [![Open In Colab][colab-badge]][colab-basic] [![Open in Binder][binder-badge]][binder-basic]

[github-badge]: https://img.shields.io/badge/View-on%20GitHub-blue?logo=GitHub
[colab-badge]: https://colab.research.google.com/assets/colab-badge.svg
[binder-badge]: https://static.mybinder.org/badge_logo.svg

[github-basic]: Basic_AlexNet_in_Keras.ipynb
[colab-basic]: https://colab.research.google.com/github/mbrukman/reimplementing-ml-papers/blob/main/alexnet/Basic_AlexNet_in_Keras.ipynb
[binder-basic]: https://mybinder.org/v2/gh/mbrukman/reimplementing-ml-papers/main?filepath=alexnet/Basic_AlexNet_in_Keras.ipynb

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import Input, Sequential
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPool2D
from matplotlib import pyplot as plt

This simple implementation does not split the data across 2 GPUs as described in the original paper for simplicity of implementation. It also does not include the custom response normalization layers (see the `TODO`s in the model below for where they should appear).

In [None]:
# Define the model architecture.
model = Sequential([
    Input(shape=(227, 227, 3)),
    Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), padding='valid', activation='relu', name='Conv1'),
    # TODO: add response normalization layer 1 here.
    MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid', name='MaxPool1'),
    Conv2D(filters=256, kernel_size=(5, 5), padding='same', activation='relu', name='Conv2'),
    # TODO: add response normalization layer 2 here.
    MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid', name='MaxPool2'),
    Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu', name='Conv3'),
    Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu', name='Conv4'),
    Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', name='Conv5'),
    MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid', name='MaxPool3'),
    Flatten(name="Flatten"),
    Dense(4096, activation='relu', name="Dense1"),
    Dropout(0.5, name="Dropout1"),
    Dense(4096, activation='relu', name="Dense2"),
    Dropout(0.5, name="Dropout2"),
    Dense(1000, activation='softmax', name="Output"),
], name="AlexNet")

model.summary()

Model: "AlexNet"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Conv1 (Conv2D)              (None, 55, 55, 96)        34944     
                                                                 
 MaxPool1 (MaxPooling2D)     (None, 27, 27, 96)        0         
                                                                 
 Conv2 (Conv2D)              (None, 27, 27, 256)       614656    
                                                                 
 MaxPool2 (MaxPooling2D)     (None, 13, 13, 256)       0         
                                                                 
 Conv3 (Conv2D)              (None, 13, 13, 384)       885120    
                                                                 
 Conv4 (Conv2D)              (None, 13, 13, 384)       1327488   
                                                                 
 Conv5 (Conv2D)              (None, 13, 13, 256)       8849