# Recap

- What's `nn.Module`, how to build a model
- Recap: what's a `CNN`
- How to build a simple CNN in pytorch?
    - kep concepts: channels (in_channels, out_channels), kernel_size, stride, padding: [link](https://poloclub.github.io/cnn-explainer/#:~:text=Figure%201.,occuring%20with%20each%20unique%20kernel.)
    - How to combine CNN and Feedforward
    - When you have a large network, how to code it?
- What's an optimizer?
- (Optional) An application: how to build a larger model? AlexNet

# Simple CNN

- in_channels = out_channels = 1

In [309]:
import torch
import torch.nn as nn

class SimpleCNN(nn.Module):

    def __init__(self):
        super().__init__()

        # nn.Conv2d
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(2, 2), stride=(1, 1))
        self.relu = nn.ReLU()
        self.max1 = nn.MaxPool2d(kernel_size=(1, 2), stride=(1, 1))

    def forward(self, input: torch.Tensor):
        conv1out = self.conv1(input)
        print('after conv1', conv1out.shape)
        relu1 = self.relu(conv1out)
        print('after act', relu1.shape)
        max1out = self.max1(relu1)
        print('after max pool', max1out.shape)

        return max1out

In [310]:
# batch_size: 4
# H: 3
# W: 4
# Conv2D: (N, C, H, W) N -> batch_size, C-> channels
input = torch.randn((4, 1, 3, 4))
simple_cnn = SimpleCNN()
out = simple_cnn(input)

after conv1 torch.Size([4, 1, 2, 3])
after act torch.Size([4, 1, 2, 3])
after max pool torch.Size([4, 1, 2, 2])


# MultiChannelCNN

- How image is represented [link](https://www.webstyleguide.com/wsg1/graphics/display_primer.html)

In [314]:
class MultiChannelCNN(nn.Module):

    def __init__(self):
        super().__init__()

        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=(2, 2), stride=(1, 1))
        self.relu1 = nn.ReLU()
        self.max1 = nn.MaxPool2d(kernel_size=(1, 2), stride=(1, 1))

    def forward(self, input: torch.Tensor):
        cnn1out = self.cnn1(input)
        print('after cnn1', cnn1out.shape)
        relu1 = self.relu1(cnn1out)
        print('after act', relu1.shape)
        max1 = self.max1(relu1)
        print('after pool', max1.shape)
        return max1

In [315]:
input = torch.randn((4, 3, 3, 4))
multi_channel_cnn = MultiChannelCNN()
out = multi_channel_cnn(input)
print(out.shape)

after cnn1 torch.Size([4, 6, 2, 3])
after act torch.Size([4, 6, 2, 3])
after pool torch.Size([4, 6, 2, 2])
torch.Size([4, 6, 2, 2])


# How to combine CNN and Feedforward NN?

- CNN 
    - input: a matrix [3, 4]
    - output a feature map [2, 2]
- FFN
    - input (for one single sample): a vector [x], e.g., [128]
    - output: a vector [number of classes] e.g., [2]
- Flatten the feature map to a vector
    - [link](https://www.superdatascience.com/blogs/convolutional-neural-networks-cnn-step-3-flattening)

In [326]:
feature_map = torch.randn((4, 6, 2, 2))
print(feature_map.shape)

torch.Size([4, 6, 2, 2])


In [328]:
# 'reshape'
emb = feature_map.reshape((4, 24))

In [330]:
simple_emb = feature_map.reshape((4, -1))
print(simple_emb.shape)

torch.Size([4, 24])


In [334]:
class DogCatPredictor(nn.Module):

    def __init__(self):
        super().__init__()

        # feature_extractor
        self.cnn1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=(2, 2), stride=(1, 1))
        self.relu1 = nn.ReLU()
        self.max1 = nn.MaxPool2d(kernel_size=(1, 2), stride=(1, 1))

        # predictor
        self.pred = nn.Linear(in_features=24, out_features=2)

    def forward(self, input: torch.Tensor):
        # extract features
        cnn1out = self.cnn1(input)
        print('after cnn1', cnn1out.shape)
        relu1 = self.relu1(cnn1out)
        print('after act', relu1.shape)
        max1 = self.max1(relu1)
        print('after pool', max1.shape)

        # 48, 128
        emb = max1.reshape((-1, 24))
        print('after reshape', emb.shape)

        logits = self.pred(emb)
        print('after pred', logits.shape)
        return logits


In [335]:
dogcat = DogCatPredictor()
input = torch.randn((4, 3, 3, 4))
logits = dogcat(input)

after cnn1 torch.Size([4, 6, 2, 3])
after act torch.Size([4, 6, 2, 3])
after pool torch.Size([4, 6, 2, 2])
after reshape torch.Size([4, 24])
after pred torch.Size([4, 2])


# Simplify the code using `nn.Sequential`

In [337]:
class SimpleDogCat(nn.Module):

    def __init__(self):
        super().__init__()

        self.feat_extractor = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=(2, 2), stride=(1, 1)),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size=(1, 2), stride=(1, 1)),
        )
        
        # predictor
        self.pred = nn.Linear(in_features=24, out_features=2)

    def forward(self, input: torch.Tensor):
        feat = self.feat_extractor(input)

        emb = feat.reshape((-1, 24))

        logits = self.pred(emb)
        return logits

In [339]:
simple_dogcat = SimpleDogCat()
input = torch.randn((4, 3, 3, 4))
logits = simple_dogcat(input)

In [340]:
print(logits.shape)

torch.Size([4, 2])


# Optimizer

How to optimizer your models?
1. forward
2. backward
3. update the weights: weight = weight - learning_rate * gradient

Gradient <- backward

Update <- optimizer

In [356]:
import torch
adam = torch.optim.Adam(simple_dogcat.parameters(), lr=10)

In [348]:
# preapre data
inputs = torch.randn((4, 3, 3, 4))
# dog: 0, cat: 1
target = torch.tensor([0, 0, 1, 1], dtype=int)

# model
loss_fn = nn.CrossEntropyLoss()

In [349]:
# forward pass
adam.zero_grad()

logits = simple_dogcat(inputs)
loss = loss_fn(logits, target)

# backward pass
loss.backward()

# update
adam.step()

In [357]:
for param in simple_dogcat.parameters():
    print('before step', param)

before step Parameter containing:
tensor([[[[-0.1422, -0.0231],
          [-0.0200, -0.1127]],

         [[-0.1441, -0.0858],
          [-0.1067, -0.0200]],

         [[ 0.2862,  0.2100],
          [ 0.2218, -0.1266]]],


        [[[ 0.2268,  0.2049],
          [-0.1332, -0.0595]],

         [[ 0.1635,  0.2513],
          [-0.2851, -0.1297]],

         [[-0.1300,  0.0317],
          [ 0.0375,  0.2017]]],


        [[[-0.2436, -0.0981],
          [-0.2792,  0.0903]],

         [[-0.2628, -0.1466],
          [-0.1280, -0.0221]],

         [[ 0.2268, -0.1667],
          [-0.1990, -0.2223]]],


        [[[-0.2615,  0.0061],
          [ 0.1002, -0.2023]],

         [[-0.2707, -0.0446],
          [-0.1937, -0.0933]],

         [[ 0.2744, -0.1357],
          [-0.0972,  0.1548]]],


        [[[-0.1640,  0.0444],
          [ 0.1545, -0.1426]],

         [[-0.1740, -0.1087],
          [ 0.0618,  0.0483]],

         [[-0.2546,  0.2032],
          [-0.2771, -0.2147]]],


        [[[-0.2595,  0.074

In [358]:
adam.zero_grad()
# forward pass
logits = simple_dogcat(input)
# backward
loss = loss_fn(logits, target)
loss.backward()
# update
adam.step()


In [359]:
for param in simple_dogcat.parameters():
    print('after step', param)

after step Parameter containing:
tensor([[[[  9.8578, -10.0231],
          [-10.0200,   9.8873]],

         [[  9.8559, -10.0858],
          [  9.8933,   9.9800]],

         [[ 10.2862,  10.2100],
          [ -9.7782,   9.8734]]],


        [[[ 10.2268,  10.2049],
          [  9.8668, -10.0595]],

         [[ 10.1632,  10.2513],
          [  9.7149,   9.8703]],

         [[  9.8699,  -9.9683],
          [ 10.0375,  -9.7983]]],


        [[[-10.2436,   9.9019],
          [  9.7208,  -9.9097]],

         [[-10.2628, -10.1466],
          [-10.1280, -10.0221]],

         [[ 10.2268,   9.8333],
          [  9.8010,   9.7777]]],


        [[[-10.2615,  10.0061],
          [ -9.8998, -10.2023]],

         [[  9.7293,   9.9554],
          [  9.8063, -10.0933]],

         [[ -9.7256, -10.1357],
          [-10.0972,  10.1547]]],


        [[[  9.8360,  -9.9556],
          [ -9.8455, -10.1426]],

         [[-10.1740,   9.8913],
          [ -9.9382,  10.0483]],

         [[-10.2545,  10.2032],
   

# AlexNet

In [373]:
class AlexNet(nn.Module):

    def __init__(self):
        super().__init__()

        self.feat_extractor = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )

        self.pred = nn.Sequential(
            nn.Linear(in_features=6400, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=1000)
        )
    
    def forward(self, input: torch.Tensor):
        feat = self.feat_extractor(input)
        emb = feat.reshape((-1, 6400))
        logits = self.pred(emb)
        return logits 


In [374]:
input = torch.randn((4, 3, 224, 224))
alex = AlexNet()
logits = alex(input)
print(logits.shape)

torch.Size([4, 1000])


In [370]:
reshape_out = out.reshape((4, -1))
print(reshape_out.shape)

torch.Size([4, 6400])


In [375]:
# prepare data
inputs = torch.randn((4, 3, 224, 224))
target = torch.tensor([312, 1, 245, 42], dtype=int)

# loss
loss_fn = nn.CrossEntropyLoss()

In [376]:
# optim
adam = torch.optim.Adam(alex.parameters(), lr=0.00001)


In [377]:
# three steps
adam.zero_grad()
# forward
logits = alex(inputs)
# backward
loss = loss_fn(logits, target)
loss.backward()
# update
adam.step()

In [None]:
# Dataclass, dataloader, collate_fn
# Model
# input <- dataloader
# model <- input
# for input, target in dataloader:
#   adam.zero_grad()
#   logits = model(input)
#   loss = loss_fn(logits, target)
#   loss.backward()
#   adam.step()

# learning schedule