In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

In [None]:
"""
AlexNet contains 8 learned layer, five convolutional, and three fully-connected.
[227x227x3] INPUT
[55x55x96] CONV1: 96 11x11 filters at stride 4, pad ?
[27x27x96] MAX_POOL1: 3x3 filters at stride 2
[27x27x96] NORM1: Normalization layer
[27x27x256] CONV2: 256 5x5 filters at stride 1, pad ?
[13x13x256] MAX_POOL2: 3x3 filters at stride 2
[13x13x256] NORM2: Normalization layer
[13x13x384] CONV3: 384 3x3 filters at stride 1, pad ?
[13x13x384] CONV4: 384 3x3 filters at stride 1, pad ?
[13x13x256] CONV5: 256 3x3 filters at stride 1, pad ?
[6x6x256] MAX_POOL3: 3x3 filters at stride 2
[4096] 4096 neurons
[4096] 4096 neurons
[1000] 1000 neurons 18데이터는 클리스가 10개

- Response-normalization layers follow the first and second convolutional layers:
Usage: torch.nn.LocalResponseNorm(size: int, alpha: float = 0.0001, beta: float = 0.75, k: float = 1.0)
- The ReLU non-linearity is applied to the output of every convolutional and fully-connected layer
- MaxPool first, second, fifth conv layer
- Dropout in the first two fully-connected layers of Figure 2 (p=0.5)
"""

class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        # First conv layer filters the 224 × 224 × 3 input image with 96 kernels of size 11 × 11 × 3 with a stride of 4 pixels
        self.conv1 = nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2)
        self.bn1 = nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75,k=2)
        
        # Second conv layer takes as input the (response-normalized and pooled) output of the first conv layer and 
        # filters it with 256 kernels of size 5 × 5 × 48
        self.conv2 = nn.Conv2d(96, 256, kernel_size=5, padding=2)
        self.bn2 = nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75,k=2)

        # third conv layer has 384 kernels of size 3 × 3 × 256 connected to the (normalized, pooled)
        self.conv3 = nn.Conv2d(256, 384, kernel_size=3, padding=1)
        
        # fourth conv layer has 384 kernels of size 3 × 3 × 192
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)

        # fifth conv layer has 256 kernels of size 3 × 3 × 192
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.dropout1 = nn.Dropout(p=0.5)
        
        # The fully-connected layers have 4096 neurons each(3개있음. 마지막은 output layer)
        self.linear1 = nn.Linear(256*6*6, 4096) # features.size(0)
        self.dropout2 = nn.Dropout(p=0.5)

        self.linear2 = nn.Linear(4096, 4096)

        self.output = nn.Linear(4096, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        x = self.bn1(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        x = self.bn2(x)
        
        x = self.conv3(x)
        x = F.relu(x)
        
        x = self.conv4(x)
        x = F.relu(x)
        
        x = self.conv5(x)
        x = F.relu(x)
        x = F.max_pool2d(x, kernel_size=3, stride=2)
        x = self.dropout1(x)
        x = x.view(-1, 256*6*6) # x.size(0), -1
        
        x = self.linear1(x)
        x = F.relu(x)
        x = self.dropout2(x)

        x = self.linear2(x)
        x = F.relu(x)

        x = self.output(x)

        return x

# x = F.max_pool2d(x, kernel_size=1)
# x = F.relu(x)

In [None]:
model = AlexNet().to(device)
print(summary(model, input_data=(3,224,224), verbose = 0))

In [None]:
------------------------------------------------------------------------------------------
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
├─Conv2d: 1-1                            [-1, 96, 55, 55]          34,944
├─LocalResponseNorm: 1-2                 [-1, 96, 27, 27]          --
├─Conv2d: 1-3                            [-1, 256, 27, 27]         614,656
├─LocalResponseNorm: 1-4                 [-1, 256, 13, 13]         --
├─Conv2d: 1-5                            [-1, 384, 13, 13]         885,120
├─Conv2d: 1-6                            [-1, 256, 13, 13]         884,992
├─Conv2d: 1-7                            [-1, 256, 13, 13]         590,080
├─Dropout: 1-8                           [-1, 256, 6, 6]           --
├─Linear: 1-9                            [-1, 4096]                37,752,832
├─Dropout: 1-10                          [-1, 4096]                --
├─Linear: 1-11                           [-1, 4096]                16,781,312
├─Linear: 1-12                           [-1, 10]                  40,970
==========================================================================================
Total params: 57,584,906
Trainable params: 57,584,906
Non-trainable params: 0
Total mult-adds (G): 1.01
------------------------------------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 4.86
Params size (MB): 219.67
Estimated Total Size (MB): 225.10
------------------------------------------------------------------------------------------