In [None]:
"""
Implementation of AlexNet, from paper
"ImageNet Classification with Deep Convolutional Neural Networks" by Alex Krizhevsky et al.

See: https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
"""

#Key features:

'''

First 5 layers are Convolutional layers, out of which first 2 and last layer will be followed by MaxPool layer.
last 3 are fc's.
input size should be : (b x 3 x 227 x 227)
        # The image in the original paper states that width and height are 224 pixels, but
        # the dimensions after first convolution layer do not lead to 55 x 55.
softmax activation function applied to final lyer , logits to pred_probs to pred labels
Response Normalization layers applies after only to 1st and 2nd Conv layers
Follow MaxPooling layer after Response Normalization layers and also after the 5th Conv layer

'''

In [None]:
import torch
from torch import nn

class AlexNet(nn.Module):
  """
  Convolutional Neural Network model consisting of layers proposed by Alexnet paper
  """
  def __init__(self, num_classes=1000):
    """
    Define and allocate layers for this neural net.

    Args:
        num_classes (int): number of classes predicted by model
    """
    super().__init__()
    self.net = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), #(b x 96 x 55 x 55)
        nn.ReLU(),
        nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2), #section 3.3
        nn.MaxPool2d(kernel_size=3, stride=2),  # (b X 96 x 27 x 27)
        nn.Conv2d(96, 256, 5, padding=2), # (b x 256 x 27 x 27)
        nn.ReLU(),
        nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
        nn.MaxPool2d(kernel_size=3, stride=2), #(b x 384 x 13 x 13 )
        nn.Conv2d(256, 384, 3, padding=1), #(b x 384 x 13 x 13)
        nn.ReLU(),
        nn.Conv2d(384, 384, 3, padding=1), #(b x 384 x 13 x 13)
        nn.ReLU(),
        nn.Conv2d(384, 256, 3, padding=1), #(b x 256 x 13 x 13)
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2), #(b x 256 x 6 x 6)
    )
    #classifier is just a name for linear layers # linear layers + non linearity : relu = fc's
    self.classifier = nn.Sequential(
        nn.Dropout(p=0.5, inplace=True),
        nn.Linear(in_features=(256 * 6 * 6), out_features=4096),
        nn.ReLU(),
        nn.Dropout(p=0.5, inplace=True),
        nn.Linear(in_features=4096, out_features=4096),
        nn.ReLU(),
        nn.Linear(in_features=4096, out_features=num_classes),
    )
    self.init_bias() #initialise bias

  def init_bias(self):
    for layer in self.net:
      if isinstance(layer, nn.Conv2d):
        nn.init.normal_(layer.weight, mean=0, std=0.01)
        nn.init.constant_(layer.bias, 0)
      #original paper = 1 for Conv2d layers 2nd, 4rth, and 5th conv layers
      nn.init.constant_(self.net[4].bias, 1)
      nn.init.constant_(self.net[10].bias, 1)
      nn.init.constant_(self.net[12].bias, 1)

  def forward(self, x):
    """
    Pass the input through the net.

    Args:
        x (Tensor): input tensor

    Returns:
        output (Tensor): output tensor
    """
    x = self.net(x)
    x = x.view(-1, 256 * 6 * 6) # reduce the dimensions for linear layer input
    return self.classifier(x)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = AlexNet(num_classes = 1000).to(device)
x = torch.randn(1, 3, 227, 227).to(device)
print(model(x).shape)


if __name__ == '__name__':
  print('AlexNet created')
  # print the seed value
  seed = torch.initial_seed()
  print('Used seed : {}'.format(seed))


  tbwriter = SummaryWriter(log_dir=LOG_DIR)
  print('TensorboardX summary writer created')

  # create model
  alexnet = AlexNet(num_classes=NUM_CLASSES).to(device)
  # train on multiple GPUs
  alexnet = torch.nn.parallel.DataParallel(alexnet, device_ids=DEVICE_IDS)
  print(alexnet)
  print('AlexNet created')

torch.Size([1, 1000])
