In [1]:
import numpy as np
x = np.random.rand(10, 1, 28, 28)   # 무작위로 데이터 생성
x.shape

(10, 1, 28, 28)

In [2]:
print(x[0].shape)   # 10개 중 첫 번째 데이터
print(x[1].shape)   # 10개 중 두 번째 데이터

(1, 28, 28)
(1, 28, 28)


In [3]:
x[0, 0]     # 첫 번째 데이터의 첫 채널의 공간 데이터에 접근

array([[6.30071295e-01, 4.41031642e-01, 7.99320069e-01, 3.12503764e-01,
        4.90887527e-01, 7.59462008e-01, 1.15083905e-01, 5.36128054e-01,
        2.11956244e-01, 1.68859286e-01, 4.07521639e-01, 1.62956843e-01,
        9.75521684e-01, 2.47183917e-01, 7.74246438e-01, 4.60834762e-01,
        3.93443577e-01, 8.29347877e-01, 4.71950470e-01, 4.12290390e-01,
        4.05145505e-01, 1.76210654e-02, 8.91234143e-01, 9.55750926e-01,
        1.69762770e-01, 5.84408421e-01, 4.12453838e-01, 4.16667679e-01],
       [4.28014140e-02, 4.94235718e-01, 7.73143260e-01, 5.69855646e-01,
        9.28859008e-01, 6.85152348e-01, 2.84340557e-01, 8.34641229e-01,
        2.40599734e-02, 4.77297151e-01, 6.09497745e-01, 7.04078820e-01,
        6.20035742e-02, 8.27097456e-01, 1.12923283e-01, 2.32785089e-01,
        3.85960397e-01, 5.08533150e-01, 4.89793402e-01, 9.85003852e-01,
        2.89879053e-01, 6.48516337e-01, 3.89313566e-01, 5.29289666e-01,
        9.86905949e-01, 3.46779923e-01, 2.02670230e-01, 1.15660

In [4]:
x[0][0]

array([[6.30071295e-01, 4.41031642e-01, 7.99320069e-01, 3.12503764e-01,
        4.90887527e-01, 7.59462008e-01, 1.15083905e-01, 5.36128054e-01,
        2.11956244e-01, 1.68859286e-01, 4.07521639e-01, 1.62956843e-01,
        9.75521684e-01, 2.47183917e-01, 7.74246438e-01, 4.60834762e-01,
        3.93443577e-01, 8.29347877e-01, 4.71950470e-01, 4.12290390e-01,
        4.05145505e-01, 1.76210654e-02, 8.91234143e-01, 9.55750926e-01,
        1.69762770e-01, 5.84408421e-01, 4.12453838e-01, 4.16667679e-01],
       [4.28014140e-02, 4.94235718e-01, 7.73143260e-01, 5.69855646e-01,
        9.28859008e-01, 6.85152348e-01, 2.84340557e-01, 8.34641229e-01,
        2.40599734e-02, 4.77297151e-01, 6.09497745e-01, 7.04078820e-01,
        6.20035742e-02, 8.27097456e-01, 1.12923283e-01, 2.32785089e-01,
        3.85960397e-01, 5.08533150e-01, 4.89793402e-01, 9.85003852e-01,
        2.89879053e-01, 6.48516337e-01, 3.89313566e-01, 5.29289666e-01,
        9.86905949e-01, 3.46779923e-01, 2.02670230e-01, 1.15660

In [5]:
import numpy as np
from common.util import im2col

# im2col(input_data, filter_h, filter_w, stride=n, pad=0)

x1 = np.random.rand(1, 3, 7, 7)     # 데이터 수, 채널 수, 높이, 너비
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)

x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)
(90, 75)


In [6]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2 * self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
        
        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T    # 필터 전개
        out = np.dot(col, col_W) + self.b
        
        # transepose(): 다차원 배열의 축 순서를 바꿔주는 함수
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
        
        return out

In [7]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)
        
        # 전개 (1)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h, self.pool_w)
        
        # 최댓값 (2)
        out = np.max(col, axis=1)
        
        # 성형 (3)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
        
        return out