In [3]:
import numpy as np
import tensorflow as tf

# Notation

Max pooling reduces the width and height of given volumes.


| Symbols                   | Meaning               | Size                                              |
|:--------------------------|:----------------------|:--------------------------------------------------|
| $\boldsymbol{X}$          | 2D Input Matrix       | $(H_{in}$, $W_{in})$                              |
| $\boldsymbol{\mathsf{X}}$ | 3D Input Tensor       | $(H_{in}$, $W_{in}$, $C_{in})$                    |
| $\boldsymbol{\mathsf{X}}$ | 4D Input Tensor       | $(N_{batch}$, $H_{in}$, $W_{in}$, $C_{in})$       |
| $\boldsymbol{Y}$          | 2D Output Matrix      | $(H_{out}$, $W_{out})$                            |
| $\boldsymbol{\mathsf{Y}}$ | 3D Output Tensor      | $(H_{out}$, $W_{out}$, $C_{out})$                 |
| $\boldsymbol{\mathsf{Y}}$ | 4D Output Tensor      | $(N_{batch}$, $H_{out}$, $W_{out}$, $C_{out})$    |

As opposed to convolution, max pooling doesn't have any learnable parameters,
so there's no $\boldsymbol{\mathsf{W}}$ or $\boldsymbol{b}$ in the list of notation above.

# 08-1: Max Pooling Forward Pass

Try calculation with NumPy and TensorFlow for the followings

#### 1. Max Pooling with stride
$(4 \times 4) \rightarrow (2 \times 2)$ where $S=2$

#### 2. Multiple Channels
$(4 \times 4 \times 3) \rightarrow (2 \times 2 \times 3)$ where $S=2$

#### 3. Mini-batch
$(4 \times 4 \times 4 \times 3) \rightarrow (4 \times 2 \times 2 \times 3)$ where $S=2$

In [4]:
def float_sequence(size):
    return np.arange(size, dtype=np.float32)

## 1. Max Pooling with stride
$(4 \times 4) \rightarrow (2 \times 2)$ where $S=2$

TensorFlow: [tf.nn.max_pool](https://www.tensorflow.org/api_docs/python/tf/nn/max_pool)

For the size of output width and height, the same fomula as convolution operation hold.

$$
H_{out} = \frac{H_{in} + 2P - H_{filter}}{S} + 1
$$

$$
W_{out} = \frac{W_{in} + 2P - F_{filter}}{S} + 1
$$

However, padding $P$ is not used in max pooling, and often filter size $H_{filter}$ and $F_{filter}$ is the same as stride $S$.
As a result, the size can be calculated as follwing.

$$
H_{out} = \frac{H_{in}}{S}
$$

$$
W_{out} = \frac{W_{in}}{S}
$$

In [13]:
X = float_sequence(4*4).reshape(4,4)
S = 2

H_out = 4 // S
W_out = 4 // S

print("=== X ===")
print(X)

Y = np.zeros((H_out, W_out))
for h in range(W_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + S
        w_start = w * S
        w_end   = w_start + S
        
        X_slice = X[h_start:h_end, w_start:w_end]
        #print(X_slice)
        Y[h,w] = np.max(X_slice)

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.max_pool(
        X.reshape(1, 4, 4, 1),
        ksize=[1, S, S, 1],
        strides=[1, S, S, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[  0.   1.   2.   3.]
 [  4.   5.   6.   7.]
 [  8.   9.  10.  11.]
 [ 12.  13.  14.  15.]]
=== Y ===
[[  5.   7.]
 [ 13.  15.]]
=== Y (TF) ===
[[  5.   7.]
 [ 13.  15.]]
=== Matched? ===
True


## 2. Multiple Channels
$(4 \times 4 \times 3) \rightarrow (2 \times 2 \times 3)$ where $S=2$

In [31]:
X = float_sequence(4*4*3).reshape(4,4,3)

S = 2

H_out = 4 // S
W_out = 4 // S

# print("=== X ===")
# print(X.transpose(2, 0, 1))

Y = np.zeros((H_out, W_out, 3))
for h in range(W_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + S
        w_start = w * S
        w_end   = w_start + S
        
        X_slice = X[h_start:h_end, w_start:w_end, :]
#         print("slice: ", X_slice.shape)
#         print(np.max(X_slice, axis=(0,1)))
        Y[h,w, :] = np.max(X_slice, axis=(0,1))

print("=== Y ===")     
print(Y.transpose(2,0,1))

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.max_pool(
        X.reshape(1, 4, 4, 3),
        ksize=[1, S, S, 1],
        strides=[1, S, S, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, :].transpose(2,0,1))

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, :]))

=== Y ===
[[[ 15.  21.]
  [ 39.  45.]]

 [[ 16.  22.]
  [ 40.  46.]]

 [[ 17.  23.]
  [ 41.  47.]]]
=== Y (TF) ===
[[[ 15.  21.]
  [ 39.  45.]]

 [[ 16.  22.]
  [ 40.  46.]]

 [[ 17.  23.]
  [ 41.  47.]]]
=== Matched? ===
True


## 3. Mini-batch
$(4 \times 4 \times 4 \times 3) \rightarrow (4 \times 2 \times 2 \times 3)$ where $S=2$

In [42]:
X = float_sequence(4*4*4*3).reshape(4,4,4,3)

S = 2

H_out = 4 // S
W_out = 4 // S

print("=== X (first) ===")
print(X[0, :, :, :].transpose(2, 0, 1))

Y = np.zeros((4, H_out, W_out, 3))
for h in range(W_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + S
        w_start = w * S
        w_end   = w_start + S
        
        X_slice = X[:, h_start:h_end, w_start:w_end, :]
#         print("slice: ", X_slice.shape)
#         print(X_slice)
#         print(np.max(X_slice, axis=(1,2)))
        Y[:, h, w, :] = np.max(X_slice, axis=(1,2))

print("=== Y (first) ===")     
print(Y[0, :, :, :].transpose(2,0,1))

with tf.Session() as sess:
#     Y_tf = sess.run(tf.nn.max_pool(
#         X,
#         ksize=[1, S, S, 1],
#         strides=[1, S, S, 1],
#         padding='VALID'
#     ))
#     print("=== Y (TF, 1st) ===")     
#     print(Y_tf[0, :, :, :].transpose(2, 0, 1))

print("=== Matched? ===")    
print(np.all(Y == Y_tf))

=== X (first) ===
[[[  0.   3.   6.   9.]
  [ 12.  15.  18.  21.]
  [ 24.  27.  30.  33.]
  [ 36.  39.  42.  45.]]

 [[  1.   4.   7.  10.]
  [ 13.  16.  19.  22.]
  [ 25.  28.  31.  34.]
  [ 37.  40.  43.  46.]]

 [[  2.   5.   8.  11.]
  [ 14.  17.  20.  23.]
  [ 26.  29.  32.  35.]
  [ 38.  41.  44.  47.]]]
=== Y (first) ===
[[[ 15.  21.]
  [ 39.  45.]]

 [[ 16.  22.]
  [ 40.  46.]]

 [[ 17.  23.]
  [ 41.  47.]]]
=== Y (TF, 1st) ===
[[[ 15.  21.]
  [ 39.  45.]]

 [[ 16.  22.]
  [ 40.  46.]]

 [[ 17.  23.]
  [ 41.  47.]]]
=== Matched? ===
True


# Generalize naive convolution foward

In [48]:
def max_pool_naive_foward(X, S):
    N_batch, H_in, W_in, C_in = X.shape
    
    H_out = H_in // S
    W_out = W_in // S

    Y = np.zeros((N_batch, H_out, W_out, C_in))
    for h in range(W_out):
        h_start = h * S
        h_end   = h_start + S
        for w in range(W_out):
            w_start = w * S
            w_end   = w_start + S
            X_slice = X[:, h_start:h_end, w_start:w_end, :]
            Y[:, h, w, :] = np.max(X_slice, axis=(1,2))

    return Y

In [49]:
S = 2
X = np.random.randn(10, 8, 8, 3).astype(np.float32)

Y = max_pool_naive_foward(X, S)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.max_pool(
        X,
        ksize=[1, S, S, 1],
        strides=[1, S, S, 1],
        padding='VALID'
    ))
#     print("=== Y (TF, 1st) ===")     
    print(Y_tf[0, :, :, :].transpose(2, 0, 1))

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-7, check)

=== Matched? ===
True 0.0


# Benchmark

In [18]:
P = 1
S = 1
X = np.random.randn(128, 28, 28, 3).astype(np.float32)
W = np.random.randn(3, 3, 3, 16).astype(np.float32)
b = np.random.randn(16).astype(np.float32)

Y = conv_naive_foward(X, W, b, P, S)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='SAME'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-7, check)

=== Matched? ===
True 5.39970939486e-08


In [19]:
%%timeit -n3 -r3

conv_naive_foward(X, W, b, P, S)

8.58 s ± 23.9 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)


# Shape Test

In [38]:
P = 3
S = 1
X = np.random.randn(64, 11, 9, 16).astype(np.float32)
W = np.random.randn(7, 7, 16, 32).astype(np.float32)
b = np.random.randn(32).astype(np.float32)

Y = conv_naive_foward(X, W, b, P, S)
print(Y.shape)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='SAME'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-5, check)

(64, 11, 9, 32)
=== Matched? ===
True 1.4678668145e-07


In [40]:
P = 0
S = 5
X = np.random.randn(4, 28, 46, 128).astype(np.float32)
W = np.random.randn(8, 16, 128, 2).astype(np.float32)
b = np.random.randn(2).astype(np.float32)

Y = conv_naive_foward(X, W, b, P, S)
print(Y.shape)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='VALID'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-5, check)

(4, 5, 7, 2)
=== Matched? ===
True 1.32524826708e-07
