# STEP 57 conv2d 함수와 pooling 함수
* im2col 함수를 통한 구현
    * image to column
    * 이미지에서 하나의 열로 변환
## 57.1 im2col에 의한 전개
* batch + 3차원 입력 데이터에서 커널에 적용할 입력 데이터 추출
    * ![](../../images/그림%2057-1.png)
    * `(C, H, W) --filter(kernel)--> (OH*OW, (C, KH, KW)) -> (OH*OW, C*KH*KW)`
* 원소 수가 원래보다 많아지므로 메모리를 많이 소비
* 행렬 라이브러리가 제공하는 최적화된 함소의 혜택을 누릴 수 있음
* 입력 데이터와 커널의 행렬 곱 계산
    * ![](../../images/그림%2057-2.png)

## 57.2 conv2d 함수 구현
* CNN에서 사용하는 함수는 코드양이 많아 dezero/functions_conv.py에 구현함
* im2col 함수 인수
```
im2col(x, kernel_size, stride=1, pad=0, to_matrix=True)
```

In [6]:
# im2col 함수 사용예
import numpy as np
import dezero.functions as F

x1 = np.random.rand(1,3,7,7) # 배치 크기 = 1
col1 = F.im2col(x1, kernel_size=5, stride=1, pad=0, to_matrix=True)
print(col1.shape) # (9,75) = (((7-5)/1 + 1) * ((7-5)/1 + 1),  3*5*5)

x2 = np.random.rand(10,3,7,7) # 배치크기 = 10
col2 = F.im2col(x2, kernel_size=(5,5), stride=(1,1), pad=(0,0), to_matrix=True)
print(col2.shape) # (90,75)

(9, 75)
(90, 75)


In [None]:
# pair 함수 구현
def pair(x):
    if isinstance(x, int):
        return (x, x)
    elif isinstance(x, tuple):
        assert len(x)==2
        return x
    else: raise ValueError

In [3]:
# conv2d_simple 함수 구현
from dezero import as_variable
from dezero.functions import linear
from dezero.functions_conv import im2col
from dezero.utils import pair, get_conv_outsize

def conv2d_simple(x, W, b=None, stride=1, pad=0):
    x, W = as_variable(x), as_variable(W)
    Weight = W
    N, C, H, W = x.shape
    OC, C, KH, KW = Weight.shape
    SH, SW = pair(stride)
    PH, PW = pair(pad)
    OH = get_conv_outsize(H, KH, SH, PH)
    OW = get_conv_outsize(W, KW, SW, PW)

    col = im2col(x, (KH, KW), stride, pad, to_matrix=True) # (N*OH*OW, C*KH*KW)
    Weight = Weight.reshape(OC, -1).transpose() # (OC, C*KH*KW)T
    t = linear(col, Weight, b) # (N*OH*OW, OC)
    y = t.reshape(N, OH, OW, OC).transpose(0, 3, 1, 2)
    return y

In [4]:
from dezero import Variable

# conv2d_simple 사용예
N, C, H, W = 1, 5, 15, 15
OC, (KH, KW) = 8, (3, 3)

x = Variable(np.random.randn(N, C, H, W))
OC, (KH, KW) = 8, (3, 3)
x = Variable(np.random.randn(N, C, H, W))
W = np.random.randn(OC, C, KH, KW)
y = conv2d_simple(x, W, b=None, stride=1, pad=1)
y.backward()

print(y.shape) # (1, 8, (15-3+2)/1 + 1, (15-3+2)/1 + 1)
print(x.grad.shape) # 1, 5, 15, 15

(1, 8, 15, 15)
(1, 5, 15, 15)


## 57.3 Conv2d 계층 구현

In [None]:
from dezero import Layer, Parameter, cuda


class Conv2d(Layer):
    def __init__(self, out_channels, kernel_size, stride=1, pad=0, nobias=False, dtype=np.float32, in_channels=None):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.pad = pad
        self.dtype = dtype

        self.W = Parameter(None, name='W')
        if in_channels is not None:
            self._init_W()
        if nobias:
            self.b = None
        else:
            self.b = Parameter(np.zeros(out_channels, dtype=dtype))

    def _init_W(self, xp=np):
        C, OC = self.in_channels, self.out_channels
        KH, KW = pair(self.kernel_size)
        scale = np.sqrt(1/(C*KH*KW))
        W_data = xp.random.randn(OC, C, KH, KW).astype(self.dtype) * scale
        self.W.data = W_data

    def forward(self, x):
        if self.W.data is None:
            self.in_channels = x.shape[1]
            xp = cuda.get_array_model(x)
            self._init_W(xp)
        y = conv2d_simple(x, self.W, self.b, self.stride, self.pad)
        return y

## 57.4 pooling 함수 구현
* polling 함수의 구현 흐름
    * ![](../../images/그림%2057-5.png)
* 전개 matrix 뽑는 과정
    * ![](../../images/그림%2057-4.png)

In [11]:
# pooling 함수 구현
def pooling_simple(x, kernel_size, stride=1, pad=0):
    x = as_variable(x)
    N,C,H,W = x.shape
    KH, KW = pair(kernel_size)
    PH, PW = pair(pad)
    SH, SW = pair(stride)
    OH = get_conv_outsize(H, KH, SH, PH) # (4, 2, 2, 0) => (4-2)/2 + 1
    OW = get_conv_outsize(W, KW, SW, PW)

    col = im2col(x, kernel_size, stride, pad, to_matrix=True) # (N*OH*OW, C*KH*KW)
    col = col.reshape(-1, KH * KW) # (N*OH*OW*C, KH*KW)
    y = col.max(axis=1) # (N*OH*OW*C, 1)
    y = y.reshape(N, OH, OW, C).transpose(0,3,1,2)
    return y



In [14]:
from dezero import Variable

# conv2d_simple 사용예
N, C, H, W = 10, 3, 4, 4
(KH, KW) = (2, 2)

x = Variable(np.random.randn(N, C, H, W))
y = pooling_simple(x, (KH, KW), stride=2, pad=0)
y.backward()

print(y.shape) # (10, 3, (4-2+0)/2 + 1, (4-2+0)/2 + 1)
print(x.grad.shape) # (10, 3, 4, 4)

(10, 3, 2, 2)
(10, 3, 4, 4)
