In [3]:
import numpy as np

In [44]:
def conv2d_single_channel(input, w):
    """Two-dimensional convolution of a single channel.

    Uses SAME padding with 0s, a stride of 1 and no dilation.

    input: input array with shape (height, width)
    w: filter array with shape (fd, fd) with odd fd.

    Returns a result with the same shape as input.
    """
    assert w.shape[0] == w.shape[1] and w.shape[0] % 2 == 1

    # SAME padding with zeros: creating a new padded array to simplify index
    # calculations and to avoid checking boundary conditions in the inner loop.
    # padded_input is like input, but padded on all sides with
    # half-the-filter-width of zeros.
    padded_input = np.pad(input,
                          pad_width=w.shape[0] // 2,
                          mode='constant',
                          constant_values=0)
    output = np.zeros_like(input)
    for i in range(output.shape[0]): # slides window vertically
        for j in range(output.shape[1]): #slides window horizontally 
            # This inner double loop computes every output element, by
            # multiplying the corresponding window into the input with the
            # filter.
            for fi in range(w.shape[0]): # (i+fi) row index of element in padded_input
                for fj in range(w.shape[1]): # (j+fj) column index of element in padded_input
                    #print("i",i,"j",j,"fi",fi, "fj",fj)
                    output[i, j] += padded_input[i + fi, j + fj] * w[fi, fj]
    return output

In [49]:
M = np.random.randn(5,5)
W = np.ones((3,3))
_=conv2d_single_channel(M,W)

(7, 7)


In [50]:
def conv2d_multi_channel(input, w):
    """Two-dimensional convolution with multiple channels.

    Uses SAME padding with 0s, a stride of 1 and no dilation.

    input: input array with shape (height, width, in_depth)
    w: filter array with shape (fd, fd, in_depth, out_depth) with odd fd.
       in_depth is the number of input channels, and has the be the same as
       input's in_depth; out_depth is the number of output channels.

    Returns a result with shape (height, width, out_depth).
    """
    assert w.shape[0] == w.shape[1] and w.shape[0] % 2 == 1

    padw = w.shape[0] // 2
    padded_input = np.pad(input,
                          pad_width=((padw, padw), (padw, padw), (0, 0)),
                          mode='constant',
                          constant_values=0)

    height, width, in_depth = input.shape
    assert in_depth == w.shape[2]
    out_depth = w.shape[3]
    output = np.zeros((height, width, out_depth))

    for out_c in range(out_depth):
        # For each output channel, perform 2d convolution summed across all
        # input channels.
        for i in range(height):
            for j in range(width):
                # Now the inner loop also works across all input channels.
                for c in range(in_depth):
                    for fi in range(w.shape[0]):
                        for fj in range(w.shape[1]):
                            w_element = w[fi, fj, c, out_c]
                            output[i, j, out_c] += (
                                padded_input[i + fi, j + fj, c] * w_element)
    return output

In [51]:
M = np.random.randn(5,5,3)
W = np.ones((3,3,3,5))
_=conv2d_multi_channel(M,W)

In [61]:
def depthwise_conv2d(input, w):
    """Two-dimensional depthwise convolution.

    Uses SAME padding with 0s, a stride of 1 and no dilation. A single output
    channel is used per input channel (channel_multiplier=1).

    input: input array with shape (height, width, in_depth)
    w: filter array with shape (fd, fd, in_depth)

    Returns a result with shape (height, width, in_depth).
    """
    assert w.shape[0] == w.shape[1] and w.shape[0] % 2 == 1

    padw = w.shape[0] // 2
    padded_input = np.pad(input,
                          pad_width=((padw, padw), (padw, padw), (0, 0)),
                          mode='constant',
                          constant_values=0)

    height, width, in_depth = input.shape
    assert in_depth == w.shape[2]
    output = np.zeros((height, width, in_depth))

    for c in range(in_depth):
        # For each input channel separately, apply its corresponsing filter
        # to the input.
        for i in range(height):
            for j in range(width):
                for fi in range(w.shape[0]):
                    for fj in range(w.shape[1]):
                        w_element = w[fi, fj, c]
                        #print('W element', w_element)
                        output[i, j, c] += (
                            padded_input[i + fi, j + fj, c] * w_element)
    return output

In [62]:
M = np.random.randn(5,5,3)
W = np.ones((3,3,3))
depthwise_conv2d(M,W)

array([[[ 2.26771577, -2.8708533 ,  1.26122229],
        [ 3.03376112, -2.51311519,  1.06724878],
        [ 3.15332291, -0.81657014,  0.3598436 ],
        [-0.73272626,  2.441963  , -2.16166845],
        [-1.49877162,  2.0842249 , -1.96769495]],

       [[ 1.1021611 , -3.77516518,  2.75867464],
        [ 3.11405786, -2.05582411,  3.16095565],
        [ 4.64540164, -0.14494812,  2.75225421],
        [ 0.96857959,  2.18401949, -0.46529562],
        [-1.04331718,  0.46467841, -0.86757664]],

       [[-0.64382228, -2.96815819,  0.63235524],
        [-0.45752467, -3.41401197,  2.77035305],
        [ 2.46908283,  0.57913718,  1.94712499],
        [ 0.29093532,  0.12756132,  1.5167949 ],
        [ 0.10463772,  0.5734151 , -0.62120292]],

       [[ 1.28657767, -0.19109151,  1.92556675],
        [ 1.64610463, -1.93247096,  3.92925751],
        [ 3.09673066, -0.27726924,  3.47203056],
        [ 1.46361492, -1.27583213,  3.44128748],
        [ 1.10408796,  0.46554731,  1.43759673]],

       [[ 2.

In [63]:
def separable_conv2d(input, w_depth, w_pointwise):
    """Depthwise separable convolution.

    Performs 2d depthwise convolution with w_depth, and then applies a pointwise
    1x1 convolution with w_pointwise on the result.

    Uses SAME padding with 0s, a stride of 1 and no dilation. A single output
    channel is used per input channel (channel_multiplier=1) in w_depth.

    input: input array with shape (height, width, in_depth)
    w_depth: depthwise filter array with shape (fd, fd, in_depth)
    w_pointwise: pointwise filter array with shape (in_depth, out_depth)

    Returns a result with shape (height, width, out_depth).
    """
    # First run the depthwise convolution. Its result has the same shape as
    # input.
    depthwise_result = depthwise_conv2d(input, w_depth)

    height, width, in_depth = depthwise_result.shape
    assert in_depth == w_pointwise.shape[0]
    out_depth = w_pointwise.shape[1]
    output = np.zeros((height, width, out_depth))

    for out_c in range(out_depth):
        for i in range(height):
            for j in range(width):
                for c in range(in_depth):
                    w_element = w_pointwise[c, out_c]
                    output[i, j, out_c] += depthwise_result[i, j, c] * w_element
    return output

In [69]:
M = np.random.randn(5,5,3)
W = np.ones((3,3,3))
WP = np.ones((3,1))
separable_conv2d(M,W,WP).shape

(5, 5, 1)