In [1]:
import numpy as np
from numpy import ndarray

In [2]:
def assert_same_shape(output: ndarray, output_grad: ndarray):
    assert output.shape == output_grad.shape, \
    '''
    Two ndarray should have the same shape; instead, first ndarray's shape is {0}
    and second ndarray's shape is {1}.
    '''.format(tuple(output_grad.shape), tuple(output.shape))

def assert_dim(t: ndarray,
               dim: ndarray):
    assert len(t.shape) == dim, \
    '''
    Tensor expected to have dimension {0}, instead has dimension {1}
    '''.format(dim, len(t.shape))


In [3]:
# 1D Convolution
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])

def _pad_1d(inp: ndarray, num: int) -> ndarray:
    z = np.array([0])
    z = np.repeat(z, num)
    return np.concatenate([z, inp, z])

_pad_1d(input_1d, 1)

array([0, 1, 2, 3, 4, 5, 0])

In [4]:
# Forward pass
def conv_1d(inp: ndarray, param: ndarray) -> ndarray:
    assert_dim(inp, 1)
    assert_dim(param, 1)
    param_len = param.shape[0]
    param_mid = param_len // 2
    inp_pad = _pad_1d(inp, param_mid)
    out = np.zeros(inp.shape)
    for o in range(out.shape[0]):
        for p in range(param_len):
            out[o] += param[p] * inp_pad[o+p]
    assert_same_shape(inp, out)
    return out

conv_1d(input_1d, param_1d)

array([ 3.,  6.,  9., 12.,  9.])

In [5]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1, 1, 1])
_pad_1d(input_1d, 2)

array([0, 0, 1, 2, 3, 4, 5, 0, 0])

In [6]:
conv_1d(input_1d, param_1d)

array([ 6., 10., 15., 14., 12.])

In [7]:
# Backward pass
def conv_1d_sum(inp: ndarray, param: ndarray) -> ndarray:
    out = conv_1d(inp, param)
    return np.sum(out)

input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d, param_1d)

39.0

In [8]:
input_1d_2 = np.array([1, 2, 3, 4, 6])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d_2, param_1d)

41.0

In [9]:
def _input_grad_1d(inp: ndarray, param: ndarray, output_grad: ndarray = None) -> ndarray:
    param_len = param.shape[0]
    param_mid = param_len // 2
    inp_pad = _pad_1d(inp, param_mid)
    if output_grad is None:
        output_grad = np.ones_like(inp)
    else:
        assert_same_shape(inp, output_grad)

    output_grad = _pad_1d(output_grad, param_mid)
    param_grad = np.zeros_like(param)
    input_grad = np.zeros_like(inp)
    for o in range(inp.shape[0]):
        for p in range(param_len):
            input_grad[o] += output_grad[o + param_len - p - 1] * param[p]
    assert_same_shape(input_grad, inp)
    return input_grad

_input_grad_1d(input_1d, param_1d)

array([2, 3, 3, 3, 2])

In [10]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d, param_1d)

39.0

In [11]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d_2 = np.array([2, 1, 1])
conv_1d_sum(input_1d, param_1d_2)

49.0

In [12]:
def _param_grad_1d(inp: ndarray, param: ndarray, output_grad: ndarray = None) -> ndarray:
    param_len = param.shape[0]
    param_mid = param_len // 2
    input_pad = _pad_1d(inp, param_mid)
    if output_grad is None:
        output_grad = np.ones_like(inp)
    else:
        assert_same_shape(inp, output_grad)

    param_grad = np.zeros_like(param)
    input_grad = np.zeros_like(inp)
    for o in range(inp.shape[0]):
        for p in range(param_len):
            param_grad[p] += input_pad[o + p] * output_grad[o]
    assert_same_shape(param_grad, param)
    return param_grad

_param_grad_1d(input_1d, param_1d)

array([10, 15, 14])

In [13]:
# Batches, 2D Convolutions
input_1d_batch = np.array([
    [0, 1, 2, 3, 4, 5, 6],
    [1, 2, 3, 4, 5, 6, 7],
])

def conv_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    outs = [conv_1d(obs, param) for obs in inp]
    return np.stack(outs)

conv_1d_batch(input_1d_batch, param_1d)

array([[ 1.,  3.,  6.,  9., 12., 15., 11.],
       [ 3.,  6.,  9., 12., 15., 18., 13.]])

In [14]:
def _pad_1d_batch(inp: ndarray, num: int) -> ndarray:
    outs = [_pad_1d(obs, num) for obs in inp]
    return np.stack(outs)

_pad_1d_batch(input_1d_batch, 1)

array([[0, 0, 1, 2, 3, 4, 5, 6, 0],
       [0, 1, 2, 3, 4, 5, 6, 7, 0]])

In [15]:
def input_grad_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    out = conv_1d_batch(inp, param)
    out_grad = np.ones_like(out)
    batch_size = out_grad.shape[0]
    grads = [_input_grad_1d(inp[i], param, out_grad[i]) for i in range(batch_size)]
    return np.stack(grads)

input_grad_1d_batch(input_1d_batch, param_1d)



array([[2, 3, 3, 3, 3, 3, 2],
       [2, 3, 3, 3, 3, 3, 2]])

In [16]:
def param_grad_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    output_grad = np.ones_like(inp)
    inp_pad = _pad_1d_batch(inp, 1)
    param_grad = np.zeros_like(param)
    for i in range(inp.shape[0]):
        for o in range(inp.shape[1]):
            for p in range(param.shape[0]):
                param_grad[p] += inp_pad[i][o + p] * output_grad[i][o]
    return param_grad
    
param_grad_1d_batch(input_1d_batch, param_1d)

array([36, 49, 48])

In [17]:
# 2D convolutions
imgs_2d_batch = np.random.randn(3, 28, 28)
imgs_2d_batch

array([[[-0.42434922, -0.14472483, -1.27013222, ..., -0.74073442,
         -1.3971833 , -0.54179224],
        [-0.35763276, -1.6784949 ,  1.03143636, ...,  1.38298904,
         -1.42219988,  0.52587608],
        [ 0.84106141,  0.31597954,  0.01738205, ...,  2.29523075,
         -1.36179772,  0.95529535],
        ...,
        [-1.78979656, -0.42676378, -1.29281323, ..., -2.47334326,
         -1.01375606,  0.72455379],
        [ 4.05129898,  0.03784841, -0.435168  , ...,  1.42691352,
         -1.376121  ,  2.59586309],
        [ 1.53618191, -0.38938324,  0.45311103, ..., -0.88061069,
          0.36252006,  0.80542943]],

       [[ 1.06568439, -0.08612292,  1.60093917, ..., -1.82067884,
         -0.93793742,  0.06243818],
        [ 0.16580653,  1.14548298, -0.81346529, ...,  0.496644  ,
         -1.4693013 , -0.20614311],
        [ 0.00736742,  0.17220568,  0.13315277, ..., -1.08472759,
         -0.08280564,  0.96593941],
        ...,
        [ 0.24798535,  3.04208432, -1.84653628, ...,  

In [18]:
param_2d = np.random.randn(3, 3) # why 2D? ready for channels ?
param_2d

array([[ 0.96969128, -1.04773981,  0.39857421],
       [-0.21581007, -1.27222732,  0.50117092],
       [ 0.80176452,  0.78770285,  0.79786428]])

In [19]:
def _pad_2d_obs(inp: ndarray, num: int):
    inp_pad = _pad_1d_batch(inp, num)
    other = np.zeros((num, inp.shape[0] + num * 2)) # pad columns
    return np.concatenate([other, inp_pad, other]) # pad rows

def _pad_2d(inp: ndarray, num: int):
    outs = [_pad_2d_obs(obs, num) for obs in inp]
    return np.stack(outs)

_pad_2d(imgs_2d_batch, 1).shape

(3, 30, 30)

In [20]:
# 2D convolutions: coding the forward pass
def _compute_output_obs_2d(obs: ndarray, param: ndarray):
    param_mid = param.shape[0] // 2
    obs_pad = _pad_2d_obs(obs, param_mid)
    out = np.zeros_like(obs)
    for o_w in range(out.shape[0]):
        for o_h in range(out.shape[1]):
            for p_w in range(param.shape[0]):
                for p_h in range(param.shape[1]):
                    out[o_w][o_h] += param[p_w][p_h] * obs_pad[o_w+p_w][o_h+p_h]
    return out

def _compute_output_2d(img_batch: ndarray, param: ndarray):    
    assert_dim(img_batch, 3)    
    outs = [_compute_output_obs_2d(obs, param) for obs in img_batch]    
    return np.stack(outs)

_compute_output_2d(imgs_2d_batch, param_2d).shape

(3, 28, 28)

In [21]:
# 2D convolutions: coding the backward pass
def _compute_grads_obs_2d(input_obs: ndarray, output_grad_obs: ndarray, param: ndarray) -> ndarray:    
    param_size = param.shape[0]
    output_obs_pad = _pad_2d_obs(output_grad_obs, param_size // 2)
    input_grad = np.zeros_like(input_obs)
    for i_w in range(input_obs.shape[0]):
        for i_h in range(input_obs.shape[1]):
            for p_w in range(param_size):
                for p_h in range(param_size):
                    input_grad[i_w][i_h] += output_obs_pad[i_w + param_size - p_w - 1][i_h + param_size - p_h - 1] * param[p_w][p_h]
    return input_grad

def _compute_grads_2d(inp: ndarray, output_grad: ndarray, param: ndarray) -> ndarray:
    grads = [_compute_grads_obs_2d(inp[i], output_grad[i], param) for i in range(output_grad.shape[0])]
    return np.stack(grads)

img_grads = _compute_grads_2d(imgs_2d_batch, np.ones_like(imgs_2d_batch), param_2d)
img_grads.shape

(3, 28, 28)

In [22]:
def _param_grad_2d(inp: ndarray, output_grad: ndarray, param: ndarray) -> ndarray:
    param_size = param.shape[0]
    inp_pad = _pad_2d(inp, param_size // 2)
    param_grad = np.zeros_like(param)
    img_shape = output_grad.shape[1:]
    for i in range(inp.shape[0]):
        for o_w in range(img_shape[0]):
            for o_h in range(img_shape[1]):
                for p_w in range(param_size):
                    for p_h in range(param_size):
                        param_grad[p_w][p_h] += inp_pad[i][o_w + p_w][o_h + p_h] * output_grad[i][o_w][o_h]
    return param_grad

param_grad = _param_grad_2d(imgs_2d_batch, np.ones_like(imgs_2d_batch), param_2d)
param_grad.shape

(3, 3)