In [3]:
import numpy as np
import tensorflow as tf

# Notation

Max pooling reduces the width and height of given volumes.


| Symbols                   | Meaning               | Size                                              |
|:--------------------------|:----------------------|:--------------------------------------------------|
| $\boldsymbol{X}$          | 2D Input Matrix       | $(H_{in}$, $W_{in})$                              |
| $\boldsymbol{\mathsf{X}}$ | 3D Input Tensor       | $(H_{in}$, $W_{in}$, $C_{in})$                    |
| $\boldsymbol{\mathsf{X}}$ | 4D Input Tensor       | $(N_{batch}$, $H_{in}$, $W_{in}$, $C_{in})$       |
| $\boldsymbol{Y}$          | 2D Output Matrix      | $(H_{out}$, $W_{out})$                            |
| $\boldsymbol{\mathsf{Y}}$ | 3D Output Tensor      | $(H_{out}$, $W_{out}$, $C_{out})$                 |
| $\boldsymbol{\mathsf{Y}}$ | 4D Output Tensor      | $(N_{batch}$, $H_{out}$, $W_{out}$, $C_{out})$    |

As opposed to convolution, max pooling doesn't have any learnable parameters,
so there's no $\boldsymbol{\mathsf{W}}$ or $\boldsymbol{b}$ in the list of notation above.

# 08-1: Max Pooling Forward Pass

Try calculation with NumPy and TensorFlow for the followings

#### 1. Max Pooling with stride
$(4 \times 4) \rightarrow (2 \times 2)$ where $S=2$

#### 2. Multiple Channels
$(4 \times 4 \times 3) \rightarrow (2 \times 2 \times 3)$ where $S=2$

#### 3. Mini-batch + bias
$(4 \times 4 \times 4 \times 3) \rightarrow (4 \times 2 \times 2 \times 3)$ where $S=2$


In [4]:
def float_sequence(size):
    return np.arange(size, dtype=np.float32)

## 1. Max Pooling with stride
$(4 \times 4) \rightarrow (2 \times 2)$ where $S=2$

TensorFlow: [tf.nn.max_pool](https://www.tensorflow.org/api_docs/python/tf/nn/max_pool)

For the size of output width and height, the same fomula as convolution operation hold.

$$
H_{out} = \frac{H_{in} + 2P - H_{filter}}{S} + 1
$$

$$
W_{out} = \frac{W_{in} + 2P - F_{filter}}{S} + 1
$$

However, padding $P$ is not used in max pooling, and often filter size $H_{filter}$ and $F_{filter}$ is the same as stride $S$.
As a result, the size can be calculated as follwing.

$$
H_{out} = \frac{H_{in}}{S}
$$

$$
W_{out} = \frac{W_{in}}{S}
$$

In [13]:
X = float_sequence(4*4).reshape(4,4)
S = 2

H_out = 4 // S
W_out = 4 // S

print("=== X ===")
print(X)

Y = np.zeros((H_out, W_out))
for h in range(W_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + S
        w_start = w * S
        w_end   = w_start + S
        
        X_slice = X[h_start:h_end, w_start:w_end]
        #print(X_slice)
        Y[h,w] = np.max(X_slice)

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.max_pool(
        X.reshape(1, 4, 4, 1),
        ksize=[1, S, S, 1],
        strides=[1, S, S, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[  0.   1.   2.   3.]
 [  4.   5.   6.   7.]
 [  8.   9.  10.  11.]
 [ 12.  13.  14.  15.]]
=== Y ===
[[  5.   7.]
 [ 13.  15.]]
=== Y (TF) ===
[[  5.   7.]
 [ 13.  15.]]
=== Matched? ===
True


### 5. Channel

$(4 \times 4 \times 3) * (3 \times 3 \times 3) = (2 \times 2)$

In [27]:
X = float_sequence(4*4*3).reshape(4,4,3)

S = 2

H_out = 4 // S
W_out = 4 // S

print("=== X ===")
print(X.transpose(2, 0, 1))

Y = np.zeros((H_out, W_out, 3))
for h in range(W_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + S
        w_start = w * S
        w_end   = w_start + S
        
        X_slice = X[h_start:h_end, w_start:w_end, :]
        print("slice: ", X_slice.shape)
        print(X_slice)
#         Y[h,w] = np.max(X_slice)

print("=== Y ===")     
print(Y.transpose(2,0,1))

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.max_pool(
        X.reshape(1, 4, 4, 3),
        ksize=[1, S, S, 1],
        strides=[1, S, S, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, :].transpose(2,0,1))

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, :]))

=== X ===
[[[  0.   3.   6.   9.]
  [ 12.  15.  18.  21.]
  [ 24.  27.  30.  33.]
  [ 36.  39.  42.  45.]]

 [[  1.   4.   7.  10.]
  [ 13.  16.  19.  22.]
  [ 25.  28.  31.  34.]
  [ 37.  40.  43.  46.]]

 [[  2.   5.   8.  11.]
  [ 14.  17.  20.  23.]
  [ 26.  29.  32.  35.]
  [ 38.  41.  44.  47.]]]
slice:  (2, 2, 3)
[[[  0.   1.   2.]
  [  3.   4.   5.]]

 [[ 12.  13.  14.]
  [ 15.  16.  17.]]]
slice:  (2, 2, 3)
[[[  6.   7.   8.]
  [  9.  10.  11.]]

 [[ 18.  19.  20.]
  [ 21.  22.  23.]]]
slice:  (2, 2, 3)
[[[ 24.  25.  26.]
  [ 27.  28.  29.]]

 [[ 36.  37.  38.]
  [ 39.  40.  41.]]]
slice:  (2, 2, 3)
[[[ 30.  31.  32.]
  [ 33.  34.  35.]]

 [[ 42.  43.  44.]
  [ 45.  46.  47.]]]
=== Y ===
[[[ 0.  0.]
  [ 0.  0.]]

 [[ 0.  0.]
  [ 0.  0.]]

 [[ 0.  0.]
  [ 0.  0.]]]
=== Y (TF) ===
[[[ 15.  21.]
  [ 39.  45.]]

 [[ 16.  22.]
  [ 40.  46.]]

 [[ 17.  23.]
  [ 41.  47.]]]
=== Matched? ===
False


### 9. Mini-batch + bias

$(3 \times 4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (4)= (3 \times 2 \times 2 \times 4)$

In [11]:
X = float_sequence(3*4*4*3).reshape(3,4,4,3)
W = 120 - float_sequence(3*3*3*4).reshape(3,3,3,4)
b = np.array([10, 100, 1000, 10000])
# print("=== X (1st) ===")
# print(X[0, :, :, :].transpose(2, 0, 1))
# print("=== X (2nd) ===")
# print(X[1, :, :, :].transpose(2, 0, 1))
# print("=== X (3rd) ===")
# print(X[2, :, :, :].transpose(2, 0, 1))



Y_wo_bias = np.zeros((3,2,2,4))
for w in range(4-3+1):
    for h in range(4-3+1):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        for n_batch in range(3):
            for c_out in range(4):
                Y_wo_bias[n_batch, h,w,c_out] = np.sum(X[n_batch, h_start:h_end, w_start:w_end, :] * W[:, :, :, c_out])
Y = Y_wo_bias + b

print("=== Y_wo_bias (1st) ===")     
print(Y_wo_bias[0, :, :, :].transpose(2, 0, 1))

print("=== Y (1st) ===")     
print(Y[0, :, :, :].transpose(2, 0, 1))

# print("=== Y_wo_bias (2nd) ===")     
# print(Y_wo_bias[1, :, :, :].transpose(2, 0, 1))

# print("=== Y (2nd) ===")     
# print(Y[1, :, :, :].transpose(2, 0, 1))

# print("=== Y_wo_bias (3rd) ===")     
# print(Y_wo_bias[2, :, :, :].transpose(2, 0, 1))

# print("=== Y (3rd) ===")     
# print(Y[2, :, :, :].transpose(2, 0, 1))

# Error, because batch size is 3
# print("=== Y (4th) ===")     
# print(Y[3, :, :, :].transpose(2, 0, 1))


with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(3, 4, 4, 3),
        W.reshape(3, 3, 3, 4),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ) + b)
    print("=== Y (TF, 1st) ===")     
    print(Y_tf[0, :, :, :].transpose(2, 0, 1))

print("=== Matched? ===")    
print(np.all(Y == Y_tf))

=== Y_wo_bias (1st) ===
[[[ 20880.  26388.]
  [ 42912.  48420.]]

 [[ 20448.  25875.]
  [ 42156.  47583.]]

 [[ 20016.  25362.]
  [ 41400.  46746.]]

 [[ 19584.  24849.]
  [ 40644.  45909.]]]
=== Y (1st) ===
[[[ 20890.  26398.]
  [ 42922.  48430.]]

 [[ 20548.  25975.]
  [ 42256.  47683.]]

 [[ 21016.  26362.]
  [ 42400.  47746.]]

 [[ 29584.  34849.]
  [ 50644.  55909.]]]
=== Y (TF, 1st) ===
[[[ 20890.  26398.]
  [ 42922.  48430.]]

 [[ 20548.  25975.]
  [ 42256.  47683.]]

 [[ 21016.  26362.]
  [ 42400.  47746.]]

 [[ 29584.  34849.]
  [ 50644.  55909.]]]
=== Matched? ===
True


# Generalize naive convolution foward

In [37]:
def conv_naive_foward(X, W, b, P=0, S=1):
    N_batch, H_in, W_in, C_in = X.shape
    H_filter, W_filter, C_in, C_out = W.shape
    
    H_out = (H_in + 2*P - H_filter) // S + 1
    W_out = (W_in + 2*P - W_filter) // S + 1
    
    if P > 0:
        X = np.pad(X, ((0, 0), (P, P), (P, P), (0, 0)), 'constant')
  
    Y = np.zeros((N_batch, H_out, W_out, C_out))
    for w in range(W_out):
        for h in range(H_out):
            h_start = h * S
            h_end   = h_start + H_filter
            w_start = w * S
            w_end   = w_start + W_filter
            for n_batch in range(N_batch):
                for c_out in range(C_out):
                    Y[n_batch, h, w, c_out] = np.sum(X[n_batch, h_start:h_end, w_start:w_end, :] * W[:, :, :, c_out])
    
    return Y + b

In [14]:
P = 1
S = 2
X = np.random.randn(128, 7, 7, 3).astype(np.float32)
W = np.random.randn(3, 3, 3, 4).astype(np.float32)
b = np.random.randn(4).astype(np.float32)

Y = conv_naive_foward(X, W, b, P, S)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='SAME'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-7, check)

=== Matched? ===
True 4.8661015951e-08


# Benchmark

In [18]:
P = 1
S = 1
X = np.random.randn(128, 28, 28, 3).astype(np.float32)
W = np.random.randn(3, 3, 3, 16).astype(np.float32)
b = np.random.randn(16).astype(np.float32)

Y = conv_naive_foward(X, W, b, P, S)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='SAME'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-7, check)

=== Matched? ===
True 5.39970939486e-08


In [19]:
%%timeit -n3 -r3

conv_naive_foward(X, W, b, P, S)

8.58 s ± 23.9 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


# Shape Test

In [38]:
P = 3
S = 1
X = np.random.randn(64, 11, 9, 16).astype(np.float32)
W = np.random.randn(7, 7, 16, 32).astype(np.float32)
b = np.random.randn(32).astype(np.float32)

Y = conv_naive_foward(X, W, b, P, S)
print(Y.shape)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='SAME'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-5, check)

(64, 11, 9, 32)
=== Matched? ===
True 1.4678668145e-07


In [40]:
P = 0
S = 5
X = np.random.randn(4, 28, 46, 128).astype(np.float32)
W = np.random.randn(8, 16, 128, 2).astype(np.float32)
b = np.random.randn(2).astype(np.float32)

Y = conv_naive_foward(X, W, b, P, S)
print(Y.shape)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='VALID'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-5, check)

(4, 5, 7, 2)
=== Matched? ===
True 1.32524826708e-07
