In [1]:
import numpy as np

# 4차원 배열

In [16]:
x = np.random.rand(10, 1, 28, 28)
x.shape

(10, 1, 28, 28)

#### 첫번째, 두번째 데이터에 접근할 때

In [18]:
print(x[0].shape)
print(x[1].shape)

(1, 28, 28)
(1, 28, 28)


#### 첫 번째 데이터의 첫 채널의 공간 데이터에 접근할 때

In [19]:
x[0, 0]

array([[2.72150636e-01, 5.22952652e-01, 1.63484498e-01, 4.22530238e-01,
        3.06112677e-01, 2.31093931e-01, 6.92474184e-01, 9.53966828e-01,
        9.01420279e-01, 7.64559204e-02, 7.63273788e-01, 9.78247817e-01,
        6.42221545e-01, 8.76812338e-01, 5.88336247e-01, 2.59457139e-01,
        5.24996681e-01, 4.09267548e-01, 7.17430783e-01, 7.07240391e-01,
        3.55372455e-01, 9.62411920e-01, 6.14120128e-01, 8.28699384e-01,
        2.10574479e-02, 7.91097288e-02, 6.47567027e-01, 7.08226698e-01],
       [6.55118858e-01, 2.32268951e-01, 5.55618683e-01, 4.09171746e-01,
        9.95058800e-01, 6.65084359e-01, 9.67789687e-01, 9.36377693e-02,
        9.57625461e-02, 6.39190012e-01, 9.38084213e-01, 1.14370781e-01,
        5.40100815e-01, 8.75973836e-01, 8.71832956e-02, 2.80188924e-01,
        9.68673263e-01, 9.62299635e-01, 3.74401930e-01, 9.70725976e-01,
        8.21920599e-01, 5.11376552e-01, 7.84306115e-01, 1.62600389e-01,
        8.92257022e-02, 5.16250184e-02, 7.69917011e-01, 3.68113

이처럼 CNN은 4차원 데이터를 다룹니다. 그래서 합성곱 연산의 구현은 복잡해질 것 같지만, im2col이라는 '트릭'이 문제를 단순하게 만들어줍니다.

# 7.4.2 im2col 로 데이터 전개하기
- im2col : 입력데이터를 필터링(가중치 계산)하기 좋게 전개하는(펼치는) 함수

# 7.4.3 합성곱 계층 구현하기

In [74]:
import sys, os
sys.path.append(os.pardir)
from deep_learning_github.common.util import im2col

x1 = np.random.rand(1,3,7,7) # 데이터 수, 채널 수, 높이, 너비
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)

x2 = np.random.rand(10,3,7,7) # 데이터 10개
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)
(90, 75)


In [80]:
import sys, os
sys.path.append(os.pardir)
from deep_learning_github.common.util import im2col, col2im

class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
        # 중간 데이터（backward 시 사용）
        self.x = None   
        self.col = None
        self.col_W = None
        
        # 가중치와 편향 매개변수의 기울기
        self.dW = None
        self.db = None
        
    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (H + 2*self.pad - FW) / self.stride)
        
        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T # 필터 전개
        out = np.dot(col, col_W) + self.b
        
        out = out.reshape(N, out_h, out_w, -1).transpose(0,3,1,2)
        
        return out
    
    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0, 2, 3, 1).reshape(-1, FN)
        
        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
        
        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
        
        return dx

# 7.4.4 풀링 계층 구현하기

In [81]:
import sys, os
sys.path.append(os.pardir)
from deep_learning_github.common.util import im2col, col2im

class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
        self.x = None
        self.arg_max = None
        
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (H - self.pool_w) / self.stride)
        
        # 전개(1)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h* self.pool_w)
        
        arg_max = np.argmax(col, axis=1)
            
        # 최댓값(2)
        out = np.max(col, axis = 1)
        
        # 성형 (3)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
        
        self.x = x
        self.arg_max = arg_max

        return out
    
    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx
        