In [1]:
import numpy as np
import tensorflow as tf

# Notation

Convolution is an operation between matrices or tensors denoted by $*$.


| Symbols                   | Meaning               | Size                                              |
|:--------------------------|:----------------------|:--------------------------------------------------|
| $\boldsymbol{X}$          | 2D Input Matrix       | $(H_{in}$, $W_{in})$                              |
| $\boldsymbol{\mathsf{X}}$ | 3D Input Tensor       | $(H_{in}$, $W_{in}$, $C_{in})$                    |
| $\boldsymbol{\mathsf{X}}$ | 4D Input Tensor       | $(N_{in}$, $H_{in}$, $W_{in}$, $C_{in})$          |
| $\boldsymbol{W}$          | 2D Filter Matrix      | $(H_{filter}$, $W_{filter})$                      |
| $\boldsymbol{\mathsf{W}}$ | 3D Filter Tensor      | $(H_{filter}$, $W_{filter}$, $C_{in})$            |
| $\boldsymbol{\mathsf{W}}$ | 4D Filter Tensor      | $(H_{filter}$, $W_{filter}$, $C_{in}$, $C_{out})$ |
| $\boldsymbol{Y}$          | 2D Output Matrix      | $(H_{out}$, $W_{out})$                            |
| $\boldsymbol{\mathsf{Y}}$ | 3D Output Tensor      | $(H_{out}$, $W_{out}$, $C_{out})$                 |
| $\boldsymbol{\mathsf{Y}}$ | 4D Output Tensor      | $(N_{in}$, $H_{out}$, $W_{out}$, $C_{out})$       |
| $\boldsymbol{b}$          | 1D Bias Vector        | $(C_{out})$                                      |

Please note that letter `w` stands for `weight` as in $\boldsymbol{W}$ or $\boldsymbol{\mathsf{W}}$, and `width` as in $W_{in}$, too.

Also, a convolution can takes padding $P$ and stride $S$.

# Convolution Operation Forward Pass

Try calculation with NumPy and TensorFlow for the followings

#### 1. Basic
$(4 \times 4) * (3 \times 3) = (2 \times 2)$

#### 2. Padding
$(4 \times 4) * (3 \times 3) = (4 \times 4)$ where $P=1$

#### 3. Stride
$(7 \times 7) * (3 \times 3) = (3 \times 3)$ where $S=2$

#### 4. Padding and Stride
$(7 \times 7) * (3 \times 3) = (4 \times 4)$ where $P=1, S=2$

#### 5. Channel
$(4 \times 4 \times 3) * (3 \times 3 \times 3) = (2 \times 2)$

#### 6. Channel and bias 
$(4 \times 4 \times 3) * (3 \times 3 \times 3) + (1) = (2 \times 2)$

#### 7. Multiple Filters
$(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) = (2 \times 2 \times 4)$

#### 8.Multiple Filters + bias 
$(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (1 \times 4)= (2 \times 2 \times 4)$

#### 9. Mini-batch
$(3 \times 4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (1 \times 4)= (3 \times 2 \times 2 \times 4)$

#### 10. RGB Mini-batch $*$ Multiple Filters with stride and padding
$(3 \times 7 \times 7 \times 3) * (3 \times 3 \times 3 \times 4) + (1 \times 4)= (3 \times 4 \times 4 \times 4)$ where $P=1, S=2$


In [27]:
def float_sequence(size):
    return np.arange(size, dtype=np.float32)

### 1. Basic Convolution

$(4 \times 4) * (3 \times 3) = (2 \times 2)$

In [56]:
X = float_sequence(4*4).reshape(4,4)
W = 12 - float_sequence(3*3).reshape(3,3)
print("=== X ===")
print(X)
print("=== W ===")
print(W)

Y = np.zeros((2,2))
for w in range(4-3+1):
    for h in range(4-3+1):
        for i in range(3):
            for j in range(3):
                Y[h,w] += X[h+i, w+j] * W[i, j]

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[  0.   1.   2.   3.]
 [  4.   5.   6.   7.]
 [  8.   9.  10.  11.]
 [ 12.  13.  14.  15.]]
=== W ===
[[ 12.  11.  10.]
 [  9.   8.   7.]
 [  6.   5.   4.]]
=== Y ===
[[ 282.  354.]
 [ 570.  642.]]
=== Y (TF) ===
[[ 282.  354.]
 [ 570.  642.]]
=== Matched? ===
True


### 2. Convolution with padding

$(4 \times 4) * (3 \times 3) = (4 \times 4)$ where $P=1$

[numpy.pad()](https://docs.scipy.org/doc/numpy/reference/generated/numpy.pad.html)

In [70]:
X_org = float_sequence(4*4).reshape(4,4)
P = 1
X = np.pad(X_org, ((P, P), (P, P)), 'constant')
W = 12 - float_sequence(3*3).reshape(3,3)
print("=== X ===")
print(X)
print("=== W ===")
print(W)

Y = np.zeros((4,4))
for w in range(6-3+1):
    for h in range(6-3+1):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        Y[h,w] = np.sum(X[h_start:h_end, w_start:w_end] * W)

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X_org.reshape(1, 4, 4, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, 1, 1, 1],
        padding='SAME'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[  0.   0.   0.   0.   0.   0.]
 [  0.   0.   1.   2.   3.   0.]
 [  0.   4.   5.   6.   7.   0.]
 [  0.   8.   9.  10.  11.   0.]
 [  0.  12.  13.  14.  15.   0.]
 [  0.   0.   0.   0.   0.   0.]]
=== W ===
[[ 12.  11.  10.]
 [  9.   8.   7.]
 [  6.   5.   4.]]
=== Y ===
[[  47.   95.  134.  113.]
 [ 153.  282.  354.  282.]
 [ 333.  570.  642.  486.]
 [ 365.  605.  662.  487.]]
=== Y (TF) ===
[[  47.   95.  134.  113.]
 [ 153.  282.  354.  282.]
 [ 333.  570.  642.  486.]
 [ 365.  605.  662.  487.]]
=== Matched? ===
True


### 3. Convolution with Stride

$(7 \times 7) * (3 \times 3) = (3 \times 3)$ where $S=2$

In [67]:
X = float_sequence(7*7).reshape(7,7)
W = 12 - float_sequence(3*3).reshape(3,3)
S = 2
print("=== X ===")
print(X)
print("=== W ===")
print(W)

Y = np.zeros((3,3))
for w in range((7-3)//2+1):
    for h in range((7-3)//2+1):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        Y[h,w] = np.sum(X[h_start:h_end, w_start:w_end] * W)

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 7, 7, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, 2, 2, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[  0.   1.   2.   3.   4.   5.   6.]
 [  7.   8.   9.  10.  11.  12.  13.]
 [ 14.  15.  16.  17.  18.  19.  20.]
 [ 21.  22.  23.  24.  25.  26.  27.]
 [ 28.  29.  30.  31.  32.  33.  34.]
 [ 35.  36.  37.  38.  39.  40.  41.]
 [ 42.  43.  44.  45.  46.  47.  48.]]
=== W ===
[[ 12.  11.  10.]
 [  9.   8.   7.]
 [  6.   5.   4.]]
=== Y ===
[[  444.   588.   732.]
 [ 1452.  1596.  1740.]
 [ 2460.  2604.  2748.]]
=== Y (TF) ===
[[  444.   588.   732.]
 [ 1452.  1596.  1740.]
 [ 2460.  2604.  2748.]]
=== Matched? ===
True


### 4. Padding and Stride

$(7 \times 7) * (3 \times 3) = (4 \times 4)$ where $P=1, S=2$

The size of output can be calculated by

$$
H_{out} = \frac{H_{in} + 2P - H_{filter}}{S} + 1
$$

$$
W_{out} = \frac{W_{in} + 2P - F_{filter}}{S} + 1
$$

This formula gives the dimension for current case

$$
H_{out} = \frac{7 + 2 \times 1 - 3}{2} + 1 = 4
$$

$$
W_{out} = \frac{7 + 2 \times 1 - 3}{2} + 1 = 4
$$

In [72]:
X_org = float_sequence(7*7).reshape(7,7)
P = 1
X = np.pad(X_org, ((P, P), (P, P)), 'constant')
W = 12 - float_sequence(3*3).reshape(3,3)
print("=== X ===")
print(X)
print("=== W ===")
print(W)

S = 2

Y = np.zeros((4,4))
for w in range((7+2-3)//2+1):
    for h in range((7+2-3)//2+1):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        Y[h,w] = np.sum(X[h_start:h_end, w_start:w_end] * W)

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X_org.reshape(1, 7, 7, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, 2, 2, 1],
        padding='SAME'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[  0.   0.   0.   0.   0.   0.   0.   0.   0.]
 [  0.   0.   1.   2.   3.   4.   5.   6.   0.]
 [  0.   7.   8.   9.  10.  11.  12.  13.   0.]
 [  0.  14.  15.  16.  17.  18.  19.  20.   0.]
 [  0.  21.  22.  23.  24.  25.  26.  27.   0.]
 [  0.  28.  29.  30.  31.  32.  33.  34.   0.]
 [  0.  35.  36.  37.  38.  39.  40.  41.   0.]
 [  0.  42.  43.  44.  45.  46.  47.  48.   0.]
 [  0.   0.   0.   0.   0.   0.   0.   0.   0.]]
=== W ===
[[ 12.  11.  10.]
 [  9.   8.   7.]
 [  6.   5.   4.]]
=== Y ===
[[   74.   179.   257.   230.]
 [  567.  1020.  1164.   909.]
 [ 1197.  2028.  2172.  1623.]
 [ 1382.  2273.  2387.  1738.]]
=== Y (TF) ===
[[   74.   179.   257.   230.]
 [  567.  1020.  1164.   909.]
 [ 1197.  2028.  2172.  1623.]
 [ 1382.  2273.  2387.  1738.]]
=== Matched? ===
True


### 5. Channel

$(4 \times 4 \times 3) * (3 \times 3 \times 3) = (2 \times 2)$

In [83]:
X = float_sequence(4*4*3).reshape(4,4,3)
W = 30 - float_sequence(3*3*3).reshape(3,3,3)
print("=== X ===")
print(X.transpose(2, 0, 1))
print("=== W ===")
print(W.transpose(2, 0, 1))

Y = np.zeros((2,2))
for w in range(4-3+1):
    for h in range(4-3+1):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        Y[h,w] = np.sum(X[h_start:h_end, w_start:w_end, :] * W)

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 3),
        W.reshape(3, 3, 3, 1),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[[  0.   3.   6.   9.]
  [ 12.  15.  18.  21.]
  [ 24.  27.  30.  33.]
  [ 36.  39.  42.  45.]]

 [[  1.   4.   7.  10.]
  [ 13.  16.  19.  22.]
  [ 25.  28.  31.  34.]
  [ 37.  40.  43.  46.]]

 [[  2.   5.   8.  11.]
  [ 14.  17.  20.  23.]
  [ 26.  29.  32.  35.]
  [ 38.  41.  44.  47.]]]
=== W ===
[[[ 30.  27.  24.]
  [ 21.  18.  15.]
  [ 12.   9.   6.]]

 [[ 29.  26.  23.]
  [ 20.  17.  14.]
  [ 11.   8.   5.]]

 [[ 28.  25.  22.]
  [ 19.  16.  13.]
  [ 10.   7.   4.]]]
=== Y ===
[[  5220.   6597.]
 [ 10728.  12105.]]
=== Y (TF) ===
[[  5220.   6597.]
 [ 10728.  12105.]]
=== Matched? ===
True


### 6. Channel and bias 

$(4 \times 4 \times 3) * (3 \times 3 \times 3) + (1) = (2 \times 2)$

In [87]:
X = float_sequence(4*4*3).reshape(4,4,3)
W = 30 - float_sequence(3*3*3).reshape(3,3,3)
b = np.array([10])
print("=== X ===")
print(X.transpose(2, 0, 1))
print("=== W ===")
print(W.transpose(2, 0, 1))
print("=== b ===")
print(b)


Y_wo_bias = np.zeros((2,2))
for w in range(4-3+1):
    for h in range(4-3+1):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        Y_wo_bias[h,w] = np.sum(X[h_start:h_end, w_start:w_end, :] * W)
Y = Y_wo_bias + b

print("=== Y_wo_bias ===")     
print(Y_wo_bias)

print("=== Y ===")     
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 3),
        W.reshape(3, 3, 3, 1),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ) + b)
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[[  0.   3.   6.   9.]
  [ 12.  15.  18.  21.]
  [ 24.  27.  30.  33.]
  [ 36.  39.  42.  45.]]

 [[  1.   4.   7.  10.]
  [ 13.  16.  19.  22.]
  [ 25.  28.  31.  34.]
  [ 37.  40.  43.  46.]]

 [[  2.   5.   8.  11.]
  [ 14.  17.  20.  23.]
  [ 26.  29.  32.  35.]
  [ 38.  41.  44.  47.]]]
=== W ===
[[[ 30.  27.  24.]
  [ 21.  18.  15.]
  [ 12.   9.   6.]]

 [[ 29.  26.  23.]
  [ 20.  17.  14.]
  [ 11.   8.   5.]]

 [[ 28.  25.  22.]
  [ 19.  16.  13.]
  [ 10.   7.   4.]]]
=== b ===
[10]
=== Y_wo_bias ===
[[  5220.   6597.]
 [ 10728.  12105.]]
=== Y ===
[[  5230.   6607.]
 [ 10738.  12115.]]
=== Y (TF) ===
[[  5230.   6607.]
 [ 10738.  12115.]]
=== Matched? ===
True


### 7. Multiple Filters

$(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) = (2 \times 2 \times 4)$

In [98]:
X = float_sequence(4*4*3).reshape(4,4,3)
W = 120 - float_sequence(3*3*3*4).reshape(3,3,3,4)
print("=== X ===")
print(X.transpose(2, 0, 1))
# print("=== W (1st) ===")
# print(W[:, :, :, 0].transpose(2, 0, 1))
# print("=== W (2nd) ===")
# print(W[:, :, :, 1].transpose(2, 0, 1))
# print("=== W (3rd) ===")
# print(W[:, :, :, 2].transpose(2, 0, 1))
# print("=== W (4th) ===")
# print(W[:, :, :, 3].transpose(2, 0, 1))

Y = np.zeros((2,2,4))
for w in range(4-3+1):
    for h in range(4-3+1):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        for c_out in range(4):
            Y[h,w,c_out] = np.sum(X[h_start:h_end, w_start:w_end, :] * W[:, :, :, c_out])

print("=== Y ===")     
print(Y.transpose(2, 0, 1))

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 3),
        W.reshape(3, 3, 3, 4),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, :].transpose(2, 0, 1))

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, :]))

=== X ===
[[[  0.   3.   6.   9.]
  [ 12.  15.  18.  21.]
  [ 24.  27.  30.  33.]
  [ 36.  39.  42.  45.]]

 [[  1.   4.   7.  10.]
  [ 13.  16.  19.  22.]
  [ 25.  28.  31.  34.]
  [ 37.  40.  43.  46.]]

 [[  2.   5.   8.  11.]
  [ 14.  17.  20.  23.]
  [ 26.  29.  32.  35.]
  [ 38.  41.  44.  47.]]]
=== Y ===
[[[ 20880.  26388.]
  [ 42912.  48420.]]

 [[ 20448.  25875.]
  [ 42156.  47583.]]

 [[ 20016.  25362.]
  [ 41400.  46746.]]

 [[ 19584.  24849.]
  [ 40644.  45909.]]]
=== Y (TF) ===
[[[ 20880.  26388.]
  [ 42912.  48420.]]

 [[ 20448.  25875.]
  [ 42156.  47583.]]

 [[ 20016.  25362.]
  [ 41400.  46746.]]

 [[ 19584.  24849.]
  [ 40644.  45909.]]]
=== Matched? ===
True


### 8.Multiple Filters + bias 

$(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (1 \times 4)= (2 \times 2 \times 4)$

In [100]:
X = float_sequence(4*4*3).reshape(4,4,3)
W = 120 - float_sequence(3*3*3*4).reshape(3,3,3,4)
b = np.array([10, 100, 1000, 10000])
print("=== X ===")
print(X.transpose(2, 0, 1))


Y_wo_bias = np.zeros((2,2,4))
for w in range(4-3+1):
    for h in range(4-3+1):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        for c_out in range(4):
            Y_wo_bias[h,w,c_out] = np.sum(X[h_start:h_end, w_start:w_end, :] * W[:, :, :, c_out])

print("=== Y_wo_bias ===")     
print(Y_wo_bias.transpose(2, 0, 1))

Y = Y_wo_bias + b
print("=== Y ===")     
print(Y.transpose(2, 0, 1))

# with tf.Session() as sess:
#     Y_tf = sess.run(tf.nn.conv2d(
#         X.reshape(1, 4, 4, 3),
#         W.reshape(3, 3, 3, 4),
#         strides=[1, 1, 1, 1],
#         padding='VALID'
#     ))
#     print("=== Y (TF) ===")     
#     print(Y_tf[0, :, :, :].transpose(2, 0, 1))

# print("=== Matched? ===")    
# print(np.all(Y == Y_tf[0, :, :, :]))

=== X ===
[[[  0.   3.   6.   9.]
  [ 12.  15.  18.  21.]
  [ 24.  27.  30.  33.]
  [ 36.  39.  42.  45.]]

 [[  1.   4.   7.  10.]
  [ 13.  16.  19.  22.]
  [ 25.  28.  31.  34.]
  [ 37.  40.  43.  46.]]

 [[  2.   5.   8.  11.]
  [ 14.  17.  20.  23.]
  [ 26.  29.  32.  35.]
  [ 38.  41.  44.  47.]]]
=== Y_wo_bias ===
[[[ 20880.  26388.]
  [ 42912.  48420.]]

 [[ 20448.  25875.]
  [ 42156.  47583.]]

 [[ 20016.  25362.]
  [ 41400.  46746.]]

 [[ 19584.  24849.]
  [ 40644.  45909.]]]
=== Y ===
[[[ 20890.  26398.]
  [ 42922.  48430.]]

 [[ 20548.  25975.]
  [ 42256.  47683.]]

 [[ 21016.  26362.]
  [ 42400.  47746.]]

 [[ 29584.  34849.]
  [ 50644.  55909.]]]


# Convolution Operation Naive Backward Pass

Try calculating NumPy, then check with TensorFlow and numerical gradient

# Fast Forward Pass with im2col

Try calculating NumPy, then check with the result above

# Convolution Operation Fast Backward

Try calculating NumPy, then check with TensorFlow

# Fast Forward Pass with im2col

Try calculating NumPy, then check with the result above