# Part-3 CNN

## Lab-10-0 Convolution Neural Network
- 합성곱 신경만 네트워크(Convolution Neural Network)
- Visdom, Datasets
- MNIST, CIFAR-10
- VGG & ResNet

## Lab-10-1 Convolution
- 합성곱(Convolution) 연산
- 필터(Filter)
- 스트라이드(Stride)
- 패딩(Padding)
- 풀링(Pooling)

### Convolution?
- 이미지 위에서 stride 값 만큼 filter(kernel)을 이동시키면서 겹쳐지는 부분의 각 원소의 값을 곱해 모두 더한 값을 출력하는 연산

### Stride and Padding
- Stride: filter를 한번에 얼마나 이동 할 것인가
- padding: zero-padding

### Convolution의 output 크기
$Output size = {{input size - filter size + (2*padding)} \over stride} + 1$

#### EX


In [64]:
import torch
import torch.nn as nn

In [65]:
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=11, stride=4, padding=0)
inputs = torch.Tensor(1, 1, 227, 227)
conv(inputs).shape

torch.Size([1, 1, 55, 55])

In [66]:
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(7, 7), stride=2, padding=0)
inputs = torch.Tensor(1, 1, 64, 64)
conv(inputs).shape

torch.Size([1, 1, 29, 29])

In [67]:
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 5), stride=1, padding=2)
inputs = torch.Tensor(1, 1, 32, 32)
conv(inputs).shape

torch.Size([1, 1, 32, 32])

In [68]:
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=5, stride=1, padding=0)
inputs = torch.Tensor(1, 1, 32, 64)
conv(inputs).shape

torch.Size([1, 1, 28, 60])

In [69]:
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(3, 3), stride=1, padding=1)
inputs = torch.Tensor(1, 1, 64, 32)
conv(inputs).shape

torch.Size([1, 1, 64, 32])

### Pooling
- Max Pooling
- Average Pooling
- ...

### CNN implementatino

In [70]:
inputs = torch.Tensor(1, 1, 28, 28)
print(inputs.shape)
conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=(5, 5))
temp = conv1(inputs)
print(temp.size())
pool1 = nn.MaxPool2d(2)
out = pool1(temp)
print(out.size())

torch.Size([1, 1, 28, 28])
torch.Size([1, 5, 24, 24])
torch.Size([1, 5, 12, 12])


## Lab-10-2 Mnist CNN
- 딥러닝 학습 단계
- CNN

In [71]:
inputs = torch.Tensor(1, 1, 28, 28)

conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
pool = nn.MaxPool2d(2)

out = conv1(inputs)
out = pool(out)
out = conv2(out)
out = pool(out)
print(out.size())

out = out.view(out.size(0), -1)
print(out.shape)

torch.Size([1, 64, 7, 7])
torch.Size([1, 3136])


In [72]:
fc = nn.Linear(3136, 10)

fc(out)

tensor([[ 0.0743, -0.0372, -0.0891, -0.0524,  0.0265, -0.1187, -0.0263, -0.0442,
         -0.0113,  0.0294]], grad_fn=<AddmmBackward0>)

In [73]:
import torch
import torch.nn as nn
import torchvision.datasets as dset
import torchvision.transforms as transforms

import torch.nn.init

In [74]:
device = "cuda" if torch.cuda.is_available() else "cpu"

torch.manual_seed(777)
if device=="cuda":
    torch.cuda.manual_seed(777)

In [75]:
# Parameters
leraning_rate = 0.001
trainig_epochs = 15
batch_size = 100

# MNIST dataset
mnist_train = dset.MNIST(root="MNIST_data/", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = dset.MNIST(root="MNIST_data/", train=False, transform=transforms.ToTensor(), download=True)

data_loader = torch.utils.data.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)

In [76]:
# CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Linear(7*7*64, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [77]:
model = CNN().to(device)

model

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=3136, out_features=10, bias=True)
)

In [78]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=leraning_rate)

In [80]:
# training
total_batch = len(data_loader)
for epoch in range(trainig_epochs):
    avg_cost = 0
    for x, y in data_loader:
        x = x.to(device)
        y = y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(x)
        
        cost = criterion(hypothesis, y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print(f"[Epoch: {epoch+1}] cost = {avg_cost}")

[Epoch: 1] cost = 0.2255527824163437
[Epoch: 2] cost = 0.06297089904546738
[Epoch: 3] cost = 0.04629518464207649
[Epoch: 4] cost = 0.03738237917423248
[Epoch: 5] cost = 0.03142663091421127
[Epoch: 6] cost = 0.02627108059823513
[Epoch: 7] cost = 0.021880438551306725
[Epoch: 8] cost = 0.018407942727208138
[Epoch: 9] cost = 0.01604434661567211
[Epoch: 10] cost = 0.01318534929305315
[Epoch: 11] cost = 0.010261600837111473
[Epoch: 12] cost = 0.010166279971599579
[Epoch: 13] cost = 0.008695926517248154
[Epoch: 14] cost = 0.00602313969284296
[Epoch: 15] cost = 0.0067491657100617886


In [82]:
# test
with torch.no_grad():
    X_test = mnist_test.data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1)==Y_test
    accuracy = correct_prediction.float().mean()
    print(f"Accuracy: {accuracy.item()}")

Accuracy: 0.9868999719619751


## Lab-10-3 visdom
- Visdom
- CNN

In [85]:
import visdom
vis = visdom.Visdom()

Setting up a new session...


In [87]:
# Text
vis.text("Hello World", env="main")

'window_3b81227143b7da'

In [88]:
# Image
vis.image(torch.randn(3, 200, 200))

'window_3b812289ac3150'

In [89]:
# Images
vis.images(torch.randn(3, 3, 28, 28))

'window_3b81229dea2500'

### MNIST and CIFAR10

In [92]:
MNIST = dset.MNIST(root="./data/MNIST_data/", train=True, transform=transforms.ToTensor(), download=True)
cifar10 = dset.CIFAR10(root="./data/cifar10/", train=True, transform=transforms.ToTensor(), download=True)

Files already downloaded and verified


In [94]:
data = cifar10.__getitem__(0)
print(data[0].shape)
vis.image(data[0], env="main")

torch.Size([3, 32, 32])


'window_3b81237c4fe020'

In [95]:
data = MNIST.__getitem__(0)
print(data[0].shape)
vis.image(data[0], env="main")

torch.Size([1, 28, 28])


'window_3b8123981c20d2'

### Check dataset

In [96]:
data_loader = torch.utils.data.DataLoader(dataset=MNIST, batch_size=32, shuffle=True)

for num, value in enumerate(data_loader):
    value = value[0]
    print(value.shape)
    vis.images(value)
    break

torch.Size([32, 1, 28, 28])


In [97]:
vis.close(env="main")

''

### Line Plot

In [98]:
Y_data = torch.randn(5)
plt = vis.line(Y=Y_data)

In [99]:
X_data = torch.Tensor([1, 2, 3, 4, 5])
plt = vis.line(Y=Y_data, X=X_data)

### Line update

In [100]:
Y_append = torch.randn(1)
X_append = torch.Tensor([6])

vis.line(Y=Y_append, X=X_append, win=plt, update="append")

'window_3b8123fec69874'

## Lab-10-4-1 ImageFolder1
- Image Folder

`splitfolders`와 유사한듯?

In [None]:
trans = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_data = dset.ImageFolder(root="", transform=trans)

## Lab-10-4-2 ImageFolder2

## Lab-10-5 Advance CNN(VGG)
- VGG

In [101]:
import torch.nn as nn
import torch.utils.model_zoo as model_zoo

In [102]:
__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]


model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}

In [107]:
class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )
        if init_weights:
            self._initialize_weights()
            
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [104]:
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
                     
    return nn.Sequential(*layers)

cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], #8 + 3 =11 == vgg11
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 10 + 3 = vgg 13
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], #13 + 3 = vgg 16
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 16 +3 =vgg 19
    'custom' : [64, 64, 64,'M', 128, 128, 128, 'M', 256, 256, 256, 'M']
}

In [None]:
conv = make_layers(cfg["custom"], batch_norm=True)

conv

In [109]:
CNN = VGG(make_layers(cfg["custom"]), num_classes=10, init_weights=True)

In [110]:
CNN

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU(inplace=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), paddin

## Lab-10-6-1 Advance CNN(RESNET-1)

## Lab-10-6-2 Advance CNN(RESNET-2)

## Lab-10-7 Next step of CNN