In [1]:
%matplotlib inline
import matplotlib.pylab as plt
import numpy as np

# Convolution

### Filter

$
(OH, OW) = 
\begin{cases}
OH = \frac{H + 2P - FH}{S} + 1 \\ 
OW = \frac{W + 2P - FW}{S} + 1 \\
H: Height \\
W: Width \\
P: Padding \\
S: Stride \\
FH: Filter Height \\
FW: Filter Width \\
\end{cases} 
$

### im2col

In [2]:
def im2col(input_data, filter_h, filter_w, stride = 1, pad = 0):
    N, C, H, W = input_data.shape
    out_h = (H + 2 * pad - filter_h) // stride + 1
    out_w = (W + 2 * pad - filter_w) // stride + 1
    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
    for y in range(filter_h):
        y_max = y + stride * out_h
        for x in range(filter_w):
            x_max = x + stride * out_w
            col[:, :, y, x, :, :] = img[:, :, y : y_max : stride, x : x_max : stride]
    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N * out_h * out_w, -1)
    return col

x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5, stride = 1, pad = 0)
print(x1.shape, col1.shape)
x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5, stride = 1, pad = 0)
print(x2.shape, col2.shape)

(1, 3, 7, 7) (9, 75)
(10, 3, 7, 7) (90, 75)


### col2im

In [3]:
def col2im(col, input_shape, filter_h, filter_w, stride = 1, pad = 0):
    N, C, H, W = input_shape
    out_h = (H + 2 * pad - filter_h) // stride + 1
    out_w = (W + 2 * pad - filter_w) // stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
    img = np.zeros((N, C, H + 2 * pad + stride - 1, W + 2 * pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride * out_h
        for x in range(filter_w):
            x_max = x + stride * out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
    return img[:, :, pad:H + pad, pad:W + pad]

img1 = col2im(col1, (1, 3, 7, 7), 5, 5, stride = 1, pad = 0)
print(col1.shape, img1.shape)
img2 = col2im(col2, (10, 3, 7, 7), 5, 5, stride = 1, pad = 0)
print(col2.shape, img2.shape)

(9, 75) (1, 3, 7, 7)
(90, 75) (10, 3, 7, 7)


In [4]:
class Convolution:
    def __init__(self, W, b, stride = 1, pad = 0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        FN, C, FH, FW = self.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2 * self.pad - FH) / (self.stride))
        out_w = int(1 + (W + 2 * self.pad - FW) / (self.stride))
        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T
        out = np.dot.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
        return out    

# Pooling

$
Pooling
\begin{cases}
Max Pooling \\
Average Pooling \\
\end{cases} 
$

In [5]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride = 1, pad = 0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h * self.pool_w)
        out = np.max(col, axis = 1)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
        return out