# Convolutional Neural Network 모델 정의

In [1]:
import torch
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [2]:
# Conv2d 생성
layer = nn.Conv2d(
    in_channels=3,   # 입력 데이터의 channel 개수. 입력 tensor 의 shape: (batch_size, channel, height, width)
    out_channels=5,  # 필터의 개수, (output feature map의 개수)
    kernel_size=3,   # 필터의 크기를 지정함.(3, 3 = 3 X 3)
    # 3가지 설정은 필수 지정 사항.
    stride=1,        # 계산을 위해 이동하는 크기. 좌->우 : 1칸씩 , 상->하: 1칸씩 이동 (default: 1) 
    padding=0,       # 패딩 크기 (정수: 상하/좌우 동일한 패딩크기를 명시 - 0 (default) : 패딩을 추가하지 않음.)    
                     # "same": 입력 size 와 동일한 size의 출력이 나오도록 알아서 패딩을 추가함.
)
# # 다음 layer 생성
# layer2 = nn.Cov2d(
#     in_channels=5, # (이전 Conv2'의 out_channels 값이 다음 Conv2'의 in_channels 값이 된다.)
# )

In [3]:
input_data = torch.ones(1, 3, 10, 10)  # batch size, channel 수, height, width
output = layer(input_data)
output.shape

torch.Size([1, 5, 8, 8])

In [4]:
(10 - 3 + 2 * 0)/1 +1    # (10 - kernel + 2 * padding)/stride +1

8.0

In [6]:
### Conv2d의 weigh의 shape.
layer.weight.shape
# [5:필터개수-out_channels,  
#  3:channel수-in_channel,  
#  3:height-kernel_size,  
#  3:width-kernel_size]

torch.Size([5, 3, 3, 3])

In [8]:
layer.bias.shape  # channel 당 1개씩 bias 추가.

torch.Size([5])

In [10]:
pool_layer = nn.MaxPool2d(
    kernel_size=2,  # 값을 추출하는 영역 크기(2, 2) - default: 2
    stride=2,       # 다음 값을 추출하기 위해서 몇칸을 이동할지 지정함. (default: kernel_size)
    padding=0,      # 값을 추출할 영역이 kernel_size보다 작을 경우 추출할지 여부. (-- 추출 안함.)

)

In [12]:
print(input_data.shape)
pool_output = pool_layer(input_data)
print(pool_output.shape)

torch.Size([1, 3, 10, 10])
torch.Size([1, 3, 5, 5])


In [14]:
input_data = torch.randn(1, 1, 5, 5)

print(input_data.shape)
pool_output = pool_layer(input_data)
print(pool_output.shape)

torch.Size([1, 1, 5, 5])
torch.Size([1, 1, 2, 2])


In [15]:
input_data[0, 0]

tensor([[ 1.3085,  1.3471,  0.5306,  1.1917,  0.1289],
        [ 1.3115, -1.1110, -0.2047,  2.3603,  0.1377],
        [-0.3777, -1.1396,  1.3689,  0.4415,  1.2804],
        [ 0.5678, -0.3815, -0.8584, -0.7741,  0.9000],
        [ 1.1591, -0.3105, -1.2150,  0.8543,  2.0922]])

In [16]:
pool_output

tensor([[[[1.3471, 2.3603],
          [0.5678, 1.3689]]]])

# MNIST

In [18]:
pip install torchinfo

Collecting torchinfo
  Using cached torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Using cached torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0
Note: you may need to restart the kernel to use updated packages.


In [19]:
import os

import torch
from torch import nn
import torchinfo

import matplotlib.pyplot as plt
import numpy as np

from module.data import load_mnist_dataset, load_fashion_mnist_dataset
from module.train import fit

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [24]:
# hyper parameter 지정
EPOCH = 1
BATCH_SIZE = 256
LEARNING_RATE = 0.001
DATA_ROOT_DIR = "datasets"

## Data 준비

In [25]:
train_loader = load_mnist_dataset(DATA_ROOT_DIR, BATCH_SIZE, True)
test_loader = load_mnist_dataset(DATA_ROOT_DIR, BATCH_SIZE, False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to datasets\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.18MB/s]


Extracting datasets\MNIST\raw\train-images-idx3-ubyte.gz to datasets\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to datasets\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 95.7kB/s]


Extracting datasets\MNIST\raw\train-labels-idx1-ubyte.gz to datasets\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to datasets\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.55MB/s]


Extracting datasets\MNIST\raw\t10k-images-idx3-ubyte.gz to datasets\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to datasets\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<?, ?B/s]

Extracting datasets\MNIST\raw\t10k-labels-idx1-ubyte.gz to datasets\MNIST\raw






## CNN 모델 정의

In [36]:
# CNN
# ConvolutionLayer -> Activation -> Pooling Layer
# ConvolutionLayer -> Activation -> ConvolutionLayer -> Activation -> Pooling Layer

# ConvolutionLayer -> BatchNormalization -> Activation -> Dropout -> Pooling Layer

## Layer 구조 : filter개수는 늘려주고, feature map의 size는 줄이는 방식으로 구성.
# depth:  Conv2d,  Size : MaxPool2d

class CNNModel(nn.Module):

    def __init__(self, dropout_rate=0.2):
        # Model 을 구성하는 Layer 함수들을 초기화(객체 생성)
        super().__init__()
        # block 단위로 정의 - nn.Sequential()
        self.b1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,   # grayscale 이미지 -> channel수:1, color:3(RGB입력)
                out_channels=32,
                kernel_size=3,
                stride=1,  # default 값이 1이므로, 생략가능함.
                padding="same"
            ),
            nn.BatchNorm2d(32),   # Conv: out_channels
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.b2 = nn.Sequential(
            nn.Conv2d(
                in_channels=32,   # b1 채널의 개수(out_channels 값)
                out_channels=64,
                kernel_size=3,
                stride=1,
                padding="same"
            ),
            nn.BatchNorm2d(64),   # Conv: out_channels
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.b3 = nn.Sequential(
            nn.Conv2d(
                in_channels=64,   # b2 채널의 개수(out_channels 값)
                out_channels=128, # 유지하거나, 늘릴수 있음.
                kernel_size=3,
                stride=1,
                padding="same"
            ),
            nn.BatchNorm2d(128),   # Conv: out_channels
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )
        
        # 추론기(분류기) - Fully Connected Layer(nn.Linear)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=4*4*128, out_features=10)  # 최종 결과를 반환할 Layer
            # out_features= class 개수
        )

    def forward(self, X):
        # 추론(연산처리) - X : 입력 image
        output = self.b1(X)
        output = self.b2(output)
        output = self.b3(output)        # 마지막 conb block -> output: Feature vector
        output = self.classifier(output)
        return output

In [37]:
model = CNNModel(dropout_rate=0.5)
model

CNNModel(
  (b1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (b2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (b3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (classifier)

In [38]:
torchinfo.summary(model, (1, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
CNNModel                                 [1, 10]                   --
├─Sequential: 1-1                        [1, 32, 14, 14]           --
│    └─Conv2d: 2-1                       [1, 32, 28, 28]           320
│    └─BatchNorm2d: 2-2                  [1, 32, 28, 28]           64
│    └─ReLU: 2-3                         [1, 32, 28, 28]           --
│    └─Dropout: 2-4                      [1, 32, 28, 28]           --
│    └─MaxPool2d: 2-5                    [1, 32, 14, 14]           --
├─Sequential: 1-2                        [1, 64, 7, 7]             --
│    └─Conv2d: 2-6                       [1, 64, 14, 14]           18,496
│    └─BatchNorm2d: 2-7                  [1, 64, 14, 14]           128
│    └─ReLU: 2-8                         [1, 64, 14, 14]           --
│    └─Dropout: 2-9                      [1, 64, 14, 14]           --
│    └─MaxPool2d: 2-10                   [1, 64, 7, 7]             --
├─Sequent

## Train

In [39]:
# 모델
model = model.to(device)

# loss 함수
loss_fn = nn.CrossEntropyLoss() # 정답: One hot encoding처리, 추론: Softmax 적용

# Optimazer
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


In [41]:
os.makedirs("saved_models", exist_ok=True)

In [40]:
save_path = "saved_models/mnist_cnn_model.pt"
result = fit(
    train_loader, test_loader, model, loss_fn, optimizer, EPOCH, save_model_path=save_path, device=device, mode="multi"
)

Epoch[1/1] - Train loss: 0.48161 Train Accucracy: 0.97232 || Validation Loss: 0.47219 Validation Accuracy: 0.97530


RuntimeError: Parent directory save_model does not exist.

In [None]:
model = torch.load(save_path)

In [42]:
# 최종 평가
from module.train import test_multi_classification
loss, acc = test_multi_classification(
    test_loader, model, loss_fn, device
)
loss, acc

(0.472193618118763, 0.9753)

In [43]:
pip install pillow

Note: you may need to restart the kernel to use updated packages.


In [45]:
## 정성적 평가 - 실제 image 파일로 확인

from PIL import Image
img = Image.open("test_img/num/eight.png")
type(img)


PIL.PngImagePlugin.PngImageFile

In [63]:
from torchvision import transforms
from PIL import Image

def predict(path, model):
    img = Image.open(path)

    # color(3) -> grayscale(1)
    img = img.convert('L')  # 'L' : grayscale, "RGB": color

    # resize
    input_tensor = transforms.Resize((28, 28))(img)

    # PIL.Image -> torch.Tensor  변환, 정규화 (0 ~ 1)
    input_tensor = transforms.ToTensor()(input_tensor)

    #batch축(dummy 축) 추가
    input_tensor = input_tensor.unsqueeze(dim=0)

    # print(type(input_tensor))
    # input_tensor.shape
    with torch.no_grad():
        result = model(input_tensor)
    sm = nn.Softmax(dim=-1)
    result_proba = sm(result)
    final_result = result_proba.max(dim=-1)
    return {"class":final_result.indices[0], "확률":final_result.values[0]}


In [65]:
from glob import glob

img_path_list = glob("test_img/num/*.png")
for img_path in img_path_list:
    result = predict(img_path, model)
    print(f"{img_path}, 추론class:{result['class']}, 확률: {result['확률']}")

test_img/num\eight.png, 추론class:8, 확률: 0.17167770862579346
test_img/num\eight2.png, 추론class:8, 확률: 0.32744091749191284
test_img/num\five.png, 추론class:5, 확률: 0.3569532632827759
test_img/num\four.png, 추론class:4, 확률: 0.5325490236282349
test_img/num\one.png, 추론class:1, 확률: 0.4972973167896271
test_img/num\seven.png, 추론class:1, 확률: 0.25710904598236084
test_img/num\seven2.png, 추론class:1, 확률: 0.4450855851173401
test_img/num\three.png, 추론class:3, 확률: 0.3694651424884796
test_img/num\three2.png, 추론class:3, 확률: 0.48430269956588745
test_img/num\two.png, 추론class:2, 확률: 0.5145750641822815
