In [28]:
import numpy as np
from numpy import ndarray

In [29]:
def assert_same_shape(output: ndarray, output_grad: ndarray):
    assert output.shape == output_grad.shape, \
    '''
    Two ndarray should have the same shape; instead, first ndarray's shape is {0}
    and second ndarray's shape is {1}.
    '''.format(tuple(output_grad.shape), tuple(output.shape))

def assert_dim(t: ndarray,
               dim: ndarray):
    assert len(t.shape) == dim, \
    '''
    Tensor expected to have dimension {0}, instead has dimension {1}
    '''.format(dim, len(t.shape))


In [30]:
# 1D Convolution
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])

def _pad_1d(inp: ndarray, num: int) -> ndarray:
    z = np.array([0])
    z = np.repeat(z, num)
    return np.concatenate([z, inp, z])

_pad_1d(input_1d, 1)

array([0, 1, 2, 3, 4, 5, 0])

In [31]:
# Forward pass
def conv_1d(inp: ndarray, param: ndarray) -> ndarray:
    assert_dim(inp, 1)
    assert_dim(param, 1)
    param_len = param.shape[0]
    param_mid = param_len // 2
    inp_pad = _pad_1d(inp, param_mid)
    out = np.zeros(inp.shape)
    for o in range(out.shape[0]):
        for p in range(param_len):
            out[o] += param[p] * inp_pad[o+p]
    assert_same_shape(inp, out)
    return out

conv_1d(input_1d, param_1d)

array([ 3.,  6.,  9., 12.,  9.])

In [32]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1, 1, 1])
_pad_1d(input_1d, 2)

array([0, 0, 1, 2, 3, 4, 5, 0, 0])

In [33]:
conv_1d(input_1d, param_1d)

array([ 6., 10., 15., 14., 12.])

In [34]:
# Backward pass
def conv_1d_sum(inp: ndarray, param: ndarray) -> ndarray:
    out = conv_1d(inp, param)
    return np.sum(out)

input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d, param_1d)

39.0

In [35]:
input_1d_2 = np.array([1, 2, 3, 4, 6])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d_2, param_1d)

41.0

In [36]:
def _input_grad_1d(inp: ndarray, param: ndarray, output_grad: ndarray = None) -> ndarray:
    param_len = param.shape[0]
    param_mid = param_len // 2
    inp_pad = _pad_1d(inp, param_mid)
    if output_grad is None:
        output_grad = np.ones_like(inp)
    else:
        assert_same_shape(inp, output_grad)

    output_grad = _pad_1d(output_grad, param_mid)
    param_grad = np.zeros_like(param)
    input_grad = np.zeros_like(inp)
    for o in range(inp.shape[0]):
        for p in range(param_len):
            input_grad[o] += output_grad[o + param_len - p - 1] * param[p]
    assert_same_shape(input_grad, inp)
    return input_grad

_input_grad_1d(input_1d, param_1d)

array([2, 3, 3, 3, 2])

In [37]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d, param_1d)

39.0

In [38]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d_2 = np.array([2, 1, 1])
conv_1d_sum(input_1d, param_1d_2)

49.0

In [39]:
def _param_grad_1d(inp: ndarray, param: ndarray, output_grad: ndarray = None) -> ndarray:
    param_len = param.shape[0]
    param_mid = param_len // 2
    input_pad = _pad_1d(inp, param_mid)
    if output_grad is None:
        output_grad = np.ones_like(inp)
    else:
        assert_same_shape(inp, output_grad)

    param_grad = np.zeros_like(param)
    input_grad = np.zeros_like(inp)
    for o in range(inp.shape[0]):
        for p in range(param_len):
            param_grad[p] += input_pad[o + p] * output_grad[o]
    assert_same_shape(param_grad, param)
    return param_grad

_param_grad_1d(input_1d, param_1d)

array([10, 15, 14])

In [42]:
# Batches, 2D Convolutions
input_1d_batch = np.array([
    [0, 1, 2, 3, 4, 5, 6],
    [1, 2, 3, 4, 5, 6, 7],
])

def conv_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    outs = [conv_1d(obs, param) for obs in inp]
    return np.stack(outs)

conv_1d_batch(input_1d_batch, param_1d)

array([[ 1.,  3.,  6.,  9., 12., 15., 11.],
       [ 3.,  6.,  9., 12., 15., 18., 13.]])

In [43]:
def _pad_1d_batch(inp: ndarray, num: int) -> ndarray:
    outs = [_pad_1d(obs, num) for obs in inp]
    return np.stack(outs)

_pad_1d_batch(input_1d_batch, 1)

array([[0, 0, 1, 2, 3, 4, 5, 6, 0],
       [0, 1, 2, 3, 4, 5, 6, 7, 0]])

In [44]:
def input_grad_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    out = conv_1d_batch(inp, param)
    out_grad = np.ones_like(out)
    batch_size = out_grad.shape[0]
    grads = [_input_grad_1d(inp[i], param, out_grad[i]) for i in range(batch_size)]
    return np.stack(grads)

input_grad_1d_batch(input_1d_batch, param_1d)



array([[2, 3, 3, 3, 3, 3, 2],
       [2, 3, 3, 3, 3, 3, 2]])

In [49]:
def param_grad_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    output_grad = np.ones_like(inp)
    inp_pad = _pad_1d_batch(inp, 1)
    param_grad = np.zeros_like(param)
    for i in range(inp.shape[0]):
        for o in range(inp.shape[1]):
            for p in range(param.shape[0]):
                param_grad[p] += inp_pad[i][o + p] * output_grad[i][o]
    return param_grad
    
param_grad_1d_batch(input_1d_batch, param_1d)

array([36, 49, 48])