In [1]:
import numpy as np
import tensorflow as tf

# Notation

Convolution is an operation between matrices or tensors denoted by $*$.


| Symbols                   | Meaning               | Size                                              |
|:--------------------------|:----------------------|:--------------------------------------------------|
| $\boldsymbol{X}$          | 2D Input Matrix       | $(H_{in}$, $W_{in})$                              |
| $\boldsymbol{\mathsf{X}}$ | 3D Input Tensor       | $(H_{in}$, $W_{in}$, $C_{in})$                    |
| $\boldsymbol{\mathsf{X}}$ | 4D Input Tensor       | $(N_{batch}$, $H_{in}$, $W_{in}$, $C_{in})$       |
| $\boldsymbol{W}$          | 2D Filter Matrix      | $(H_{filter}$, $W_{filter})$                      |
| $\boldsymbol{\mathsf{W}}$ | 3D Filter Tensor      | $(H_{filter}$, $W_{filter}$, $C_{in})$            |
| $\boldsymbol{\mathsf{W}}$ | 4D Filter Tensor      | $(H_{filter}$, $W_{filter}$, $C_{in}$, $C_{out})$ |
| $\boldsymbol{Y}$          | 2D Output Matrix      | $(H_{out}$, $W_{out})$                            |
| $\boldsymbol{\mathsf{Y}}$ | 3D Output Tensor      | $(H_{out}$, $W_{out}$, $C_{out})$                 |
| $\boldsymbol{\mathsf{Y}}$ | 4D Output Tensor      | $(N_{batch}$, $H_{out}$, $W_{out}$, $C_{out})$    |
| $\boldsymbol{b}$          | 1D Bias Vector        | $(C_{out})$                                       |

Please note that letter `w` stands for `weight` as in $\boldsymbol{W}$ or $\boldsymbol{\mathsf{W}}$, and `width` as in $W_{in}$, too.

Also, a convolution can takes padding $P$ and stride $S$.

# 07-03: Efficient Convolution Forward Pass using im2col

Try calculation with NumPy and TensorFlow for the followings

#### 1. Basic 2D Convolution
$(4 \times 4) * (3 \times 3) = (2 \times 2)$

#### 2. Padding
$(4 \times 4) * (3 \times 3) = (4 \times 4)$ where $P=1$

#### 3. Stride
$(7 \times 7) * (3 \times 3) = (3 \times 3)$ where $S=2$

#### 4. Padding and Stride
$(7 \times 7) * (3 \times 3) = (4 \times 4)$ where $P=1, S=2$

#### 5. Channel
$(4 \times 4 \times 3) * (3 \times 3 \times 3) = (2 \times 2)$

#### 6. Channel and bias 
$(4 \times 4 \times 3) * (3 \times 3 \times 3) + (1) = (2 \times 2)$

#### 7. Multiple Filters
$(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) = (2 \times 2 \times 4)$

#### 8.Multiple Filters + bias 
$(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (4)= (2 \times 2 \times 4)$

#### 9. Mini-batch + bias
$(3 \times 4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (4)= (3 \times 2 \times 2 \times 4)$

#### 10. RGB Mini-batch $*$ Multiple Filters with stride and padding
$(3 \times 7 \times 7 \times 3) * (3 \times 3 \times 3 \times 4) + (4)= (3 \times 4 \times 4 \times 4)$ where $P=1, S=2$


In [2]:
def float_sequence(size):
    return np.arange(size, dtype=np.float32)

### 1. Basic Convolution using im2col

Convolution: $(4 \times 4) * (3 \times 3) = (2 \times 2)$

im2col: $(4 \times 9) \times (9 \times 1) = (4 \times 1)$, then reshape to $(2 \times 2)$

In [39]:
X = float_sequence(4*4).reshape(4,4)
W = 12 - float_sequence(3*3).reshape(3,3)

# Expand input X into dottable form
X_col = np.zeros((4,9))
for h in range(2):
    for w in range(2):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end]
        X_col_row_index = h * 2 + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)

print("=== X ===")
print(X)
print("=== X_col ===")     
print(X_col)

# Reshape filters W into into dottable form
W_col = W.reshape(-1, 1)

print("=== W ===")
print(W)
print("=== W_col ===")
print(W_col)

# Matrix-multiply X_col and W_col
Y_col = np.dot(X_col, W_col)

# Reshape Y_col into output form
Y = Y_col.reshape(2, 2)


print("=== Y_col ===")
print(Y_col)
print("=== Y ===")
print(Y)


with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== X ===
[[  0.   1.   2.   3.]
 [  4.   5.   6.   7.]
 [  8.   9.  10.  11.]
 [ 12.  13.  14.  15.]]
=== X_col ===
[[  0.   1.   2.   4.   5.   6.   8.   9.  10.]
 [  1.   2.   3.   5.   6.   7.   9.  10.  11.]
 [  4.   5.   6.   8.   9.  10.  12.  13.  14.]
 [  5.   6.   7.   9.  10.  11.  13.  14.  15.]]
=== W ===
[[ 12.  11.  10.]
 [  9.   8.   7.]
 [  6.   5.   4.]]
=== W_col ===
[[ 12.]
 [ 11.]
 [ 10.]
 [  9.]
 [  8.]
 [  7.]
 [  6.]
 [  5.]
 [  4.]]
=== Y_col ===
[[ 282.]
 [ 354.]
 [ 570.]
 [ 642.]]
=== Y ===
[[ 282.  354.]
 [ 570.  642.]]
=== Y (TF) ===
[[ 282.  354.]
 [ 570.  642.]]
=== Matched? ===
True


### 2. Convolution with padding using im2col

Convolution: $(4 \times 4) * (3 \times 3) = (4 \times 4)$ where $P=1$

im2col: $(16 \times 9) \times (9 \times 1) = (16 \times 1)$, then reshape to $(4 \times 4)$

[numpy.pad()](https://docs.scipy.org/doc/numpy/reference/generated/numpy.pad.html)

In [29]:
X_org = float_sequence(4*4).reshape(4,4)
P = 1
X = np.pad(X_org, ((P, P), (P, P)), 'constant')
W = 12 - float_sequence(3*3).reshape(3,3)

H_out = (4 + 2*P - 3) + 1
W_out = (4 + 2*P - 3) + 1

X_col = np.zeros((16,9))
for h in range(H_out):
    for w in range(W_out):
        h_start = h
        h_end   = h_start + 3
        w_start = w
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end]
        X_col_row_index = h * H_out + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)

# print("=== X ===")
# print(X)
# print("=== X_col ===")     
# print(X_col)


# Reshape filters W into into dottable form
W_col = W.reshape(-1, 1)

# print("=== W ===")
# print(W)
# print("=== W_col ===")
# print(W_col)

# Matrix-multiply X_col and W_col
Y_col = np.dot(X_col, W_col)

# Reshape Y_col into output form
Y = Y_col.reshape(H_out, W_out)

print("=== Y_col ===")
print(Y_col)
print("=== Y ===")
print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X_org.reshape(1, 4, 4, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, 1, 1, 1],
        padding='SAME'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== Y_col ===
[[  47.]
 [  95.]
 [ 134.]
 [ 113.]
 [ 153.]
 [ 282.]
 [ 354.]
 [ 282.]
 [ 333.]
 [ 570.]
 [ 642.]
 [ 486.]
 [ 365.]
 [ 605.]
 [ 662.]
 [ 487.]]
=== Y ===
[[  47.   95.  134.  113.]
 [ 153.  282.  354.  282.]
 [ 333.  570.  642.  486.]
 [ 365.  605.  662.  487.]]
=== Y (TF) ===
[[  47.   95.  134.  113.]
 [ 153.  282.  354.  282.]
 [ 333.  570.  642.  486.]
 [ 365.  605.  662.  487.]]
=== Matched? ===
True


### 3. Convolution with Stride using im2col

Convolution: $(7 \times 7) * (3 \times 3) = (3 \times 3)$ where $S=2$

im2col: $(9 \times 9) \times (9 \times 1) = (9 \times 1)$, then reshape to $(3 \times 3)$

In [35]:
P = 0
S = 2
X = float_sequence(7*7).reshape(7,7)
W = 12 - float_sequence(3*3).reshape(3,3)

H_out = (7 + 2*P - 3) // S + 1
W_out = (7 + 2*P - 3) // S + 1

X_col = np.zeros((H_out * W_out, 9))
for h in range(H_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end]
        X_col_row_index = h * H_out + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


# print("=== X ===")
# print(X)
# print("=== X_col ===")     
# print(X_col)



# Reshape filters W into into dottable form
W_col = W.reshape(-1, 1)

# print("=== W ===")
# print(W)
# print("=== W_col ===")
# print(W_col)


# Matrix-multiply X_col and W_col
Y_col = np.dot(X_col, W_col)

# Reshape Y_col into output form
Y = Y_col.reshape(H_out, W_out)

print("=== Y_col ===")
print(Y_col)
print("=== Y ===")
print(Y)


with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 7, 7, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, S, S, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== Y_col ===
[[  444.]
 [  588.]
 [  732.]
 [ 1452.]
 [ 1596.]
 [ 1740.]
 [ 2460.]
 [ 2604.]
 [ 2748.]]
=== Y ===
[[  444.   588.   732.]
 [ 1452.  1596.  1740.]
 [ 2460.  2604.  2748.]]
=== Y (TF) ===
[[  444.   588.   732.]
 [ 1452.  1596.  1740.]
 [ 2460.  2604.  2748.]]
=== Matched? ===
True


### 4. Convolution with Padding and Stride using im2col

Convolution: $(7 \times 7) * (3 \times 3) = (4 \times 4)$ where $P=1, S=2$

im2col: $(16 \times 9) \times (9 \times 1) = (16 \times 1)$, then reshape to $(4 \times 4)$

In [41]:
P = 1
S = 2
X_org = float_sequence(7*7).reshape(7,7)
X = np.pad(X_org, ((P, P), (P, P)), 'constant')
W = 12 - float_sequence(3*3).reshape(3,3)

H_out = (7 + 2*P - 3) // S + 1
W_out = (7 + 2*P - 3) // S + 1

X_col = np.zeros((H_out * W_out, 9))
for h in range(H_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end]
        X_col_row_index = h * H_out + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


# print("=== X ===")
# print(X)
# print("=== X_col ===")     
# print(X_col)



# Reshape filters W into into dottable form
W_col = W.reshape(-1, 1)

# print("=== W ===")
# print(W)
# print("=== W_col ===")
# print(W_col)


# Matrix-multiply X_col and W_col
Y_col = np.dot(X_col, W_col)

# Reshape Y_col into output form
Y = Y_col.reshape(H_out, W_out)

# print("=== Y_col ===")
# print(Y_col)
print("=== Y ===")
print(Y)


with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X_org.reshape(1, 7, 7, 1),
        W.reshape(3, 3, 1, 1),
        strides=[1, 2, 2, 1],
        padding='SAME'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== Y ===
[[   74.   179.   257.   230.]
 [  567.  1020.  1164.   909.]
 [ 1197.  2028.  2172.  1623.]
 [ 1382.  2273.  2387.  1738.]]
=== Y (TF) ===
[[   74.   179.   257.   230.]
 [  567.  1020.  1164.   909.]
 [ 1197.  2028.  2172.  1623.]
 [ 1382.  2273.  2387.  1738.]]
=== Matched? ===
True


### 5. Convolution with Channels using im2col

Convolution: $(4 \times 4 \times 3) * (3 \times 3 \times 3) = (2 \times 2)$

im2col: $(4 \times 27) \times (27 \times 1) = (4 \times 1)$, then reshape to $(2 \times 2)$

In [56]:
P = 0
S = 1
X = float_sequence(4*4*3).reshape(4,4,3)
W = 30 - float_sequence(3*3*3).reshape(3,3,3)

H_out = (4 + 2*P - 3) // S + 1
W_out = (4 + 2*P - 3) // S + 1

#================== X ==================
X_col = np.zeros((H_out * W_out, 3*3*3))
for h in range(H_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end, :].transpose(2, 0, 1)
        X_col_row_index = h * H_out + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


# print("=== X (red)===")
# print(X[:, :, 0])
# print("=== X (green)===")
# print(X[:, :, 1])
# print("=== X (blue)===")
# print(X[:, :, 2])
# print("=== X_col ===")     
# print(X_col.shape)
# print(X_col)


#================== W ==================

W_col = W.transpose(2, 0, 1).reshape(-1, 1)

# print("=== W (red) ===")
# print(W[:, :, 0])
# print("=== W (green) ===")
# print(W[:, :, 1])
# print("=== W (blue) ===")
# print(W[:, :, 2])
# print("=== W_col ===")
# print(W_col)


#================== Y ==================

# Matrix-multiply X_col and W_col
Y_col = np.dot(X_col, W_col)

# Reshape Y_col into output form
Y = Y_col.reshape(H_out, W_out)

print("=== Y_col ===")
print(Y_col)
print("=== Y ===")
print(Y)


#================== tf ==================

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 3),
        W.reshape(3, 3, 3, 1),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ))
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== Y_col ===
[[  5220.]
 [  6597.]
 [ 10728.]
 [ 12105.]]
=== Y ===
[[  5220.   6597.]
 [ 10728.  12105.]]
=== Y (TF) ===
[[  5220.   6597.]
 [ 10728.  12105.]]
=== Matched? ===
True


### 6. Convolution with Channels and bias using im2col

Convolution: $(4 \times 4 \times 3) * (3 \times 3 \times 3) + (1) = (2 \times 2)$

im2col: $(4 \times 27) \times (27 \times 1) = (4 \times 1)$, then reshape to $(2 \times 2)$

In [59]:
P = 0
S = 1
X = float_sequence(4*4*3).reshape(4,4,3)
W = 30 - float_sequence(3*3*3).reshape(3,3,3)
b = np.array([10])

H_out = (4 + 2*P - 3) // S + 1
W_out = (4 + 2*P - 3) // S + 1

#================== X ==================
X_col = np.zeros((H_out * W_out, 3*3*3))
for h in range(H_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end, :].transpose(2, 0, 1)
        X_col_row_index = h * H_out + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


#================== W ==================
W_col = W.transpose(2, 0, 1).reshape(-1, 1)


#================== Y ==================
Y_col = np.dot(X_col, W_col)
Y = Y_col.reshape(H_out, W_out) + b


with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 3),
        W.reshape(3, 3, 3, 1),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ) + b)
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, 0]))

=== Y (TF) ===
[[  5230.   6607.]
 [ 10738.  12115.]]
=== Matched? ===
True


### 7. Convolution with Multiple Filters using im2col

Convolution: $(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) = (2 \times 2 \times 4)$

im2col: $(4 \times 27) \times (27 \times 4) = (4 \times 4)$, then reshape to $(2 \times 2 \times 4)$

In [71]:
P = 0
S = 1
X = float_sequence(4*4*3).reshape(4,4,3)
W = 120 - float_sequence(3*3*3*4).reshape(3,3,3,4)

H_out = (4 + 2*P - 3) // S + 1
W_out = (4 + 2*P - 3) // S + 1

#================== X ==================
X_col = np.zeros((H_out * W_out, 3*3*3))
for h in range(H_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end, :].transpose(2, 0, 1)
        X_col_row_index = h * H_out + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


# print("=== X (red)===")
# print(X[:, :, 0])
# print("=== X (green)===")
# print(X[:, :, 1])
# print("=== X (blue)===")
# print(X[:, :, 2])
# print("=== X_col ===")     
# print(X_col.shape)
# print(X_col)


#================== W ==================

W_col = W.transpose(2, 0, 1, 3).reshape(-1, 4)

# print("=== W (red, first filter) ===")
# print(W[:, :, 0, 0])
# print("=== W (green, first filter) ===")
# print(W[:, :, 1, 0])
# print("=== W (blue, first filter) ===")
# print(W[:, :, 2, 0])
# print("=== W (red, last filter) ===")
# print(W[:, :, 0, 3])
# print("=== W (green, last filter) ===")
# print(W[:, :, 1, 3])
# print("=== W (blue, last filter) ===")
# print(W[:, :, 2, 3])
# print("=== W_col ===")
# print(W_col)


#================== Y ==================

Y_col = np.dot(X_col, W_col)
Y = Y_col.reshape(H_out, W_out, 4)

print("=== Y_col ===")
print(Y_col)
print("=== Y (first channel) ===")
print(Y[:, :, 0])


with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 3),
        W.reshape(3, 3, 3, 4),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ))
    print("=== Y (TF, first channel) ===")     
    print(Y_tf[0, :, :, 0])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, :]))

=== Y_col ===
[[ 20880.  20448.  20016.  19584.]
 [ 26388.  25875.  25362.  24849.]
 [ 42912.  42156.  41400.  40644.]
 [ 48420.  47583.  46746.  45909.]]
=== Y (first channel) ===
[[ 20880.  26388.]
 [ 42912.  48420.]]
=== Y (TF, first channel) ===
[[ 20880.  26388.]
 [ 42912.  48420.]]
=== Matched? ===
True


### 8. Convolution with Multiple Filters + bias using im2col

Convolution: $(4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (4)= (2 \times 2 \times 4)$

im2col: $(4 \times 27) \times (27 \times 4) = (4 \times 4)$, then reshape to $(2 \times 2 \times 4)$

In [74]:
P = 0
S = 1
X = float_sequence(4*4*3).reshape(4,4,3)
W = 120 - float_sequence(3*3*3*4).reshape(3,3,3,4)
b = np.array([10, 100, 1000, 10000])

H_out = (4 + 2*P - 3) // S + 1
W_out = (4 + 2*P - 3) // S + 1

#================== X ==================
X_col = np.zeros((H_out * W_out, 3*3*3))
for h in range(H_out):
    for w in range(W_out):
        h_start = h * S
        h_end   = h_start + 3
        w_start = w * S
        w_end   = w_start + 3
        
        X_slice = X[h_start:h_end, w_start:w_end, :].transpose(2, 0, 1)
        X_col_row_index = h * H_out + w
        X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


#================== W ==================
W_col = W.transpose(2, 0, 1, 3).reshape(-1, 4)

#================== Y ==================
Y_col = np.dot(X_col, W_col)
Y = Y_col.reshape(H_out, W_out, 4) + b


print("=== Y ===")     
print(Y)


with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(1, 4, 4, 3),
        W.reshape(3, 3, 3, 4),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ) + b)
    print("=== Y (TF) ===")     
    print(Y_tf[0, :, :, :])

print("=== Matched? ===")    
print(np.all(Y == Y_tf[0, :, :, :]))

=== Y ===
[[[ 20890.  20548.  21016.  29584.]
  [ 26398.  25975.  26362.  34849.]]

 [[ 42922.  42256.  42400.  50644.]
  [ 48430.  47683.  47746.  55909.]]]
=== Y (TF) ===
[[[ 20890.  20548.  21016.  29584.]
  [ 26398.  25975.  26362.  34849.]]

 [[ 42922.  42256.  42400.  50644.]
  [ 48430.  47683.  47746.  55909.]]]
=== Matched? ===
True


### 9. Convolution with Mini-batch + bias using im2col

Convolution: $(3 \times 4 \times 4 \times 3) * (3 \times 3 \times 3 \times 4) + (4)= (3 \times 2 \times 2 \times 4)$

im2col: $(12 \times 27) \times (27 \times 4) = (12 \times 4)$, then reshape to $(3 \times 2 \times 2 \times 4)$

In [90]:
P = 0
S = 1
X = float_sequence(3*4*4*3).reshape(3,4,4,3)
W = 120 - float_sequence(3*3*3*4).reshape(3,3,3,4)
b = np.array([10, 100, 1000, 10000])

H_out = (4 + 2*P - 3) // S + 1
W_out = (4 + 2*P - 3) // S + 1

#================== X ==================
X_col = np.zeros((3 * H_out * W_out, 3*3*3))
for n_batch in range(3):
    for h in range(H_out):
        for w in range(W_out):
            h_start = h * S
            h_end   = h_start + 3
            w_start = w * S
            w_end   = w_start + 3

            X_slice = X[n_batch, h_start:h_end, w_start:w_end, :].transpose(2, 0, 1)
            X_col_row_index = n_batch * (4) + h * H_out + w
#             print("index:", X_col_row_index)
            X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


# print("=== X (red, first example)===")
# print(X[0, :, :, 0])
# print("=== X (green, first example)===")
# print(X[0, :, :, 1])
# print("=== X (blue, first example)===")
# print(X[0, :, :, 2])

# print("=== X (red, last example)===")
# print(X[2, :, :, 0])
# print("=== X (green, last example)===")
# print(X[2, :, :, 1])
# print("=== X (blue, last example)===")
# print(X[2, :, :, 2])

# print("=== X_col ===")     
# print(X_col.shape)
# print(X_col)



#================== W ==================
W_col = W.transpose(2, 0, 1, 3).reshape(-1, 4)

#================== Y ==================
Y_col = np.dot(X_col, W_col)
Y = Y_col.reshape(3, H_out, W_out, 4) + b

# print("=== Y_col ===")
# print(Y_col)
# print(Y_col.shape)
# print("=== Y ===")
# print(Y)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X.reshape(3, 4, 4, 3),
        W.reshape(3, 3, 3, 4),
        strides=[1, 1, 1, 1],
        padding='VALID'
    ) + b)
#     print("=== Y (TF) ===")     
#     print(Y_tf[:, :, :, :])

print("=== Matched? ===")    
print(np.all(Y == Y_tf))

=== Matched? ===
True


#### 10. RGB Mini-batch $*$ Multiple Filters with stride and padding using im2col

$(3 \times 7 \times 7 \times 3) * (3 \times 3 \times 3 \times 4) + (4)= (3 \times 4 \times 4 \times 4)$ where $P=1, S=2$

im2col: $(48 \times 27) \times (27 \times 4) = (48 \times 4)$, then reshape to $(3 \times 4 \times 4 \times 4)$

In [95]:
P = 1
S = 2
X = np.pad(X_org, ((0, 0), (P, P), (P, P), (0, 0)), 'constant')
X_org = float_sequence(3*7*7*3).reshape(3,7,7,3)
W = 120 - float_sequence(3*3*3*4).reshape(3,3,3,4)
b = np.array([10, 100, 1000, 10000])

H_out = (7 + 2*P - 3) // S + 1
W_out = (7 + 2*P - 3) // S + 1

#================== X ==================
X_col = np.zeros((3 * H_out * W_out, 3*3*3))
for n_batch in range(3):
    for h in range(H_out):
        for w in range(W_out):
            h_start = h * S
            h_end   = h_start + 3
            w_start = w * S
            w_end   = w_start + 3

            X_slice = X[n_batch, h_start:h_end, w_start:w_end, :].transpose(2, 0, 1)
            X_col_row_index = n_batch * (H_out * W_out) + h * H_out + w
            X_col[X_col_row_index, :] = X_slice.reshape(1, -1)


#================== W ==================
W_col = W.transpose(2, 0, 1, 3).reshape(-1, 4)

#================== Y ==================
Y_col = np.dot(X_col, W_col)
Y = Y_col.reshape(3, H_out, W_out, 4) + b


print("=== Y (1st) ===")     
print(Y[0, :, :, :].transpose(2, 0, 1))

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X_org.reshape(3, 7, 7, 3),
        W.reshape(3, 3, 3, 4),
        strides=[1, 2, 2, 1],
        padding='SAME'
    ) + b)
    print("=== Y (TF, 1st) ===")     
    print(Y_tf[0, :, :, :].transpose(2, 0, 1))

print("=== Matched? ===")    
print(np.all(Y == Y_tf))

=== Y (1st) ===
[[[   4466.   11878.   17278.   16418.]
  [  40390.   75646.   86662.   69982.]
  [  87262.  152758.  163774.  125926.]
  [ 110882.  185758.  195046.  144434.]]

 [[   4400.   11653.   16945.   16172.]
  [  39679.   74413.   85267.   69001.]
  [  85795.  150391.  161245.  124189.]
  [ 109556.  183643.  192823.  142928.]]

 [[   5144.   12238.   17422.   16736.]
  [  39778.   73990.   84682.   68830.]
  [  85138.  148834.  159526.  123262.]
  [ 109040.  182338.  191410.  142232.]]

 [[  13988.   20923.   25999.   25400.]
  [  47977.   81667.   92197.   76759.]
  [  92581.  155377.  165907.  130435.]
  [ 116624.  189133.  198097.  149636.]]]
=== Y (TF, 1st) ===
[[[   4466.   11878.   17278.   16418.]
  [  40390.   75646.   86662.   69982.]
  [  87262.  152758.  163774.  125926.]
  [ 110882.  185758.  195046.  144434.]]

 [[   4400.   11653.   16945.   16172.]
  [  39679.   74413.   85267.   69001.]
  [  85795.  150391.  161245.  124189.]
  [ 109556.  183643.  192823.  142

# Generalized efficient convolution foward using im2col

In [115]:
def conv_foward(X, W, b, P=0, S=1):
    N_batch, H_in, W_in, C_in = X.shape
    H_filter, W_filter, C_in, C_out = W.shape
    
    H_out = (H_in + 2*P - H_filter) // S + 1
    W_out = (W_in + 2*P - W_filter) // S + 1
    
    if P > 0:
        X = np.pad(X, ((0, 0), (P, P), (P, P), (0, 0)), 'constant')

    X_col = np.zeros((N_batch * H_out * W_out, H_filter*W_filter*C_in))
    for n_batch in range(N_batch): # TODO: Maybe I can remove this loop over N_batch?
        for h in range(H_out):
            for w in range(W_out):
                h_start = h * S
                h_end   = h_start + H_filter
                w_start = w * S
                w_end   = w_start + W_filter

                X_slice = X[n_batch, h_start:h_end, w_start:w_end, :].transpose(2, 0, 1)
                X_col_row_index = n_batch * (H_out * W_out) + h * H_out + w
                X_col[X_col_row_index, :] = X_slice.reshape(1, -1)

    W_col = W.transpose(2, 0, 1, 3).reshape(-1, C_out)

    Y_col = np.dot(X_col, W_col)
    Y = Y_col.reshape(N_batch, H_out, W_out, C_out) + b

    return Y

In [116]:
P = 1
S = 2
X = np.random.randn(128, 7, 7, 3).astype(np.float32)
W = np.random.randn(3, 3, 3, 4).astype(np.float32)
b = np.random.randn(4).astype(np.float32)

Y = conv_foward(X, W, b, P, S)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='SAME'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-7, check)

=== Matched? ===
True 4.62691817187e-08


# Benchmark

In [117]:
P = 1
S = 1
X = np.random.randn(128, 28, 28, 3).astype(np.float32)
W = np.random.randn(3, 3, 3, 16).astype(np.float32)
b = np.random.randn(16).astype(np.float32)

Y = conv_foward(X, W, b, P, S)

with tf.Session() as sess:
    Y_tf = sess.run(tf.nn.conv2d(
        X,
        W,
        strides=[1, S, S, 1],
        padding='SAME'
    ) + b)

print("=== Matched? ===")    
check = np.linalg.norm(Y - Y_tf) / ((np.linalg.norm(Y) + np.linalg.norm(Y_tf)))
print(check < 1e-7, check)

=== Matched? ===
True 5.01429490937e-08


In [121]:
%%timeit -n3 -r3

conv_foward(X, W, b, P, S)

259 ms ± 2.29 ms per loop (mean ± std. dev. of 3 runs, 3 loops each)
