In [75]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [86]:
class AlexNet(nn.Module):
    
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        
        self.feature_extractor = nn.Sequential(
            # input size = 227x227x3
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4), # 55x55x96
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2), # 27x27x96
            nn.LocalResponseNorm(2),
            nn.Conv2d(96, 256, 5, padding=2), # 27x27x256
            nn.ReLU(),
            nn.MaxPool2d(3, stride=2), # 13x13x256
            nn.LocalResponseNorm(2),
            nn.Conv2d(256, 384, 3, padding=1), # 13x13x384
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, padding=1), # 13x13x384
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, padding=1), # 13x13x256
            nn.ReLU(),
            nn.MaxPool2d(3, stride=2), # 6x6x256
        )
        
        self. classifier = nn.Sequential(
            nn.Linear(in_features=9216, out_features=4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.Dropout(0.5),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096,num_classes)
        )
        
    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x,1)
        logits = self.classifier(x)
        probs = F.softmax(logits, dim=1)
        return logits, probs

In [87]:
model = AlexNet()
model

AlexNet(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): LocalResponseNorm(2, alpha=0.0001, beta=0.75, k=1.0)
    (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): LocalResponseNorm(2, alpha=0.0001, beta=0.75, k=1.0)
    (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU()
    (2): Linear(

## Summary
- 출처: https://pypi.org/project/torch-summary/

In [88]:
from torchsummary import summary

In [89]:
summary(model, (3, 227, 227))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 96, 55, 55]          34,944
|    └─ReLU: 2-2                         [-1, 96, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 96, 27, 27]          --
|    └─LocalResponseNorm: 2-4            [-1, 96, 27, 27]          --
|    └─Conv2d: 2-5                       [-1, 256, 27, 27]         614,656
|    └─ReLU: 2-6                         [-1, 256, 27, 27]         --
|    └─MaxPool2d: 2-7                    [-1, 256, 13, 13]         --
|    └─LocalResponseNorm: 2-8            [-1, 256, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 384, 13, 13]         885,120
|    └─ReLU: 2-10                        [-1, 384, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 384, 13, 13]         1,327,488
|    └─ReLU: 2-12                        [-1, 384, 13, 13]      

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 96, 55, 55]          34,944
|    └─ReLU: 2-2                         [-1, 96, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 96, 27, 27]          --
|    └─LocalResponseNorm: 2-4            [-1, 96, 27, 27]          --
|    └─Conv2d: 2-5                       [-1, 256, 27, 27]         614,656
|    └─ReLU: 2-6                         [-1, 256, 27, 27]         --
|    └─MaxPool2d: 2-7                    [-1, 256, 13, 13]         --
|    └─LocalResponseNorm: 2-8            [-1, 256, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 384, 13, 13]         885,120
|    └─ReLU: 2-10                        [-1, 384, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 384, 13, 13]         1,327,488
|    └─ReLU: 2-12                        [-1, 384, 13, 13]      

**AlexNet 구조 설명** 
- AlexNet의 입력층은 227x227의 크기의 이미지를 RGB 3개의 채널로 받고 있습니다. 

- 5개의 Convolution layer과 3개의 Fully connected layer로 이루어져 있으며, 마지막 레이어의 소프트맥스 함수를 통해 예측을 하게 됩니다. 

- `Activation`: AlexNet은 기존에 사용하던 sigmoid, tanh 함수 대신에 relu함수를 사용하였습니다. 

- `Local Response Normalization(LRN)`: relu 함수를 활성화함수로 사용했을 때, 결과값이 양수 방향으로 무한히 커질 가능성이 있어 정규화과정을 수행하였습니다. 

- `Overlapping Pooling`: 기존의 2x2 커널 크기의 pooling 방식이 아니라, 3x3크기의 커널을 사용하여 pooling이 겹치게 하였습니다. 

- `Dropout`: FC1과 FC2에서 dropout을 사용하였습니다. 