In [5]:
import os
import sys
sys.path.append(os.pardir)
import numpy as np
from common.util import im2col, col2im

In [20]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

In [28]:
test_relu = Relu()
print("forward:\n", test_relu.forward(np.array([[-2, -1], [0, 1]])))
print("backward:\n", test_relu.backward(np.array([[2, 3], [-1, 1]])))

forward:
 [[0 0]
 [0 1]]
backward:
 [[0 0]
 [0 1]]


In [43]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.original_x_shape = None

        self.dW = None
        self.db = None


    def forward(self, x):
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x
        return np.dot(self.x, self.W) + self.b

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        dx = dx.reshape(*self.original_x_shape)
        return dx

In [45]:
test_affine = Affine(np.array([[1, 2, 3], [1, 2, 3]]), 0) # W.shape(2, 3)
print(test_affine.forward(np.array([[1, 1,], [1, 1]]))) # forward.shape = (2, 3)
print(test_affine.backward(np.array([[0, 1, 1], [2, 0, 0]]))) # backward.shape = (2, 2)
print("test_affine.db:", test_affine.db)

[[2 4 6]
 [2 4 6]]
[[5 5]
 [2 2]]
test_affine.db: [2 1 1]


In [14]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

        # backwardで使用
        self.x = None
        self.col = None
        self.col_W = None

        # 勾配
        self.dW = None
        self.db = None

    
    def forward(self, x):
        print(self.W.shape, x.shape)
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape

        out_h = 1 + int((H + 2 * self.pad - FH) / self.stride)
        out_w = 1 + int((W + 2 * self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, stride=self.stride, pad=self.pad)
        col_W = self.W.reshape(FN, -1).T

        out = np.dot(col, col_W) + self.b
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) # (N, C, out_h, out_w)の順
        self.x = x
        self.col = col
        self.col_W = col_W
        
        return out


    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0, 2, 3, 1).reshape(-1, FN)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx

In [13]:
test_conv = Convolution(np.random.rand(1, 3, 3, 3), 0, 1, 0)
test_conv.forward(np.random.rand(1, 3, 4, 4))

(1, 3, 3, 3) (1, 3, 4, 4)


array([[[[7.04689522, 5.59129646],
         [7.43447836, 7.12265213]]]])