In [1]:
import numpy as np
from numpy import ndarray
from lincoln import *

In [2]:
def assert_same_shape(output: ndarray, output_grad: ndarray):
    assert output.shape == output_grad.shape, \
    '''
    Two ndarray should have the same shape; instead, first ndarray's shape is {0}
    and second ndarray's shape is {1}.
    '''.format(tuple(output_grad.shape), tuple(output.shape))

def assert_dim(t: ndarray,
               dim: ndarray):
    assert len(t.shape) == dim, \
    '''
    Tensor expected to have dimension {0}, instead has dimension {1}
    '''.format(dim, len(t.shape))


In [3]:
# 1D Convolution
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])

def _pad_1d(inp: ndarray, num: int) -> ndarray:
    z = np.array([0])
    z = np.repeat(z, num)
    return np.concatenate([z, inp, z])

_pad_1d(input_1d, 1)

array([0, 1, 2, 3, 4, 5, 0])

In [4]:
# Forward pass
def conv_1d(inp: ndarray, param: ndarray) -> ndarray:
    assert_dim(inp, 1)
    assert_dim(param, 1)
    param_len = param.shape[0]
    param_mid = param_len // 2
    inp_pad = _pad_1d(inp, param_mid)
    out = np.zeros(inp.shape)
    for o in range(out.shape[0]):
        for p in range(param_len):
            out[o] += param[p] * inp_pad[o+p]
    assert_same_shape(inp, out)
    return out

conv_1d(input_1d, param_1d)

array([ 3.,  6.,  9., 12.,  9.])

In [5]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1, 1, 1])
_pad_1d(input_1d, 2)

array([0, 0, 1, 2, 3, 4, 5, 0, 0])

In [6]:
conv_1d(input_1d, param_1d)

array([ 6., 10., 15., 14., 12.])

In [7]:
# Backward pass
def conv_1d_sum(inp: ndarray, param: ndarray) -> ndarray:
    out = conv_1d(inp, param)
    return np.sum(out)

input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d, param_1d)

39.0

In [8]:
input_1d_2 = np.array([1, 2, 3, 4, 6])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d_2, param_1d)

41.0

In [9]:
def _input_grad_1d(inp: ndarray, param: ndarray, output_grad: ndarray = None) -> ndarray:
    param_len = param.shape[0]
    param_mid = param_len // 2
    inp_pad = _pad_1d(inp, param_mid)
    if output_grad is None:
        output_grad = np.ones_like(inp)
    else:
        assert_same_shape(inp, output_grad)

    output_grad = _pad_1d(output_grad, param_mid)
    param_grad = np.zeros_like(param)
    input_grad = np.zeros_like(inp)
    for o in range(inp.shape[0]):
        for p in range(param_len):
            input_grad[o] += output_grad[o + param_len - p - 1] * param[p]
    assert_same_shape(input_grad, inp)
    return input_grad

_input_grad_1d(input_1d, param_1d)

array([2, 3, 3, 3, 2])

In [10]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d = np.array([1, 1, 1])
conv_1d_sum(input_1d, param_1d)

39.0

In [11]:
input_1d = np.array([1, 2, 3, 4, 5])
param_1d_2 = np.array([2, 1, 1])
conv_1d_sum(input_1d, param_1d_2)

49.0

In [12]:
def _param_grad_1d(inp: ndarray, param: ndarray, output_grad: ndarray = None) -> ndarray:
    param_len = param.shape[0]
    param_mid = param_len // 2
    input_pad = _pad_1d(inp, param_mid)
    if output_grad is None:
        output_grad = np.ones_like(inp)
    else:
        assert_same_shape(inp, output_grad)

    param_grad = np.zeros_like(param)
    input_grad = np.zeros_like(inp)
    for o in range(inp.shape[0]):
        for p in range(param_len):
            param_grad[p] += input_pad[o + p] * output_grad[o]
    assert_same_shape(param_grad, param)
    return param_grad

_param_grad_1d(input_1d, param_1d)

array([10, 15, 14])

In [13]:
# Batches, 2D Convolutions
input_1d_batch = np.array([
    [0, 1, 2, 3, 4, 5, 6],
    [1, 2, 3, 4, 5, 6, 7],
])

def conv_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    outs = [conv_1d(obs, param) for obs in inp]
    return np.stack(outs)

conv_1d_batch(input_1d_batch, param_1d)

array([[ 1.,  3.,  6.,  9., 12., 15., 11.],
       [ 3.,  6.,  9., 12., 15., 18., 13.]])

In [14]:
def _pad_1d_batch(inp: ndarray, num: int) -> ndarray:
    outs = [_pad_1d(obs, num) for obs in inp]
    return np.stack(outs)

_pad_1d_batch(input_1d_batch, 1)

array([[0, 0, 1, 2, 3, 4, 5, 6, 0],
       [0, 1, 2, 3, 4, 5, 6, 7, 0]])

In [15]:
def input_grad_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    out = conv_1d_batch(inp, param)
    out_grad = np.ones_like(out)
    batch_size = out_grad.shape[0]
    grads = [_input_grad_1d(inp[i], param, out_grad[i]) for i in range(batch_size)]
    return np.stack(grads)

input_grad_1d_batch(input_1d_batch, param_1d)



array([[2, 3, 3, 3, 3, 3, 2],
       [2, 3, 3, 3, 3, 3, 2]])

In [16]:
def param_grad_1d_batch(inp: ndarray, param: ndarray) -> ndarray:
    output_grad = np.ones_like(inp)
    inp_pad = _pad_1d_batch(inp, 1)
    param_grad = np.zeros_like(param)
    for i in range(inp.shape[0]):
        for o in range(inp.shape[1]):
            for p in range(param.shape[0]):
                param_grad[p] += inp_pad[i][o + p] * output_grad[i][o]
    return param_grad
    
param_grad_1d_batch(input_1d_batch, param_1d)

array([36, 49, 48])

In [17]:
# 2D convolutions
imgs_2d_batch = np.random.randn(3, 28, 28)
imgs_2d_batch

array([[[-1.03417238, -2.09239755,  0.69135793, ..., -1.07786546,
         -0.44008649, -0.03273197],
        [ 0.56529457,  1.04933864, -0.47084324, ..., -0.2602004 ,
          0.94179117,  1.03752791],
        [-1.73092859,  0.23349788, -1.3382604 , ...,  1.07008601,
          2.50556758,  1.01098692],
        ...,
        [ 0.22675193, -0.5000509 , -0.18338481, ...,  0.85593941,
          0.81651735, -0.62873667],
        [-0.53212729,  0.16203003,  0.97031897, ..., -0.78911639,
         -0.56099684, -1.85952668],
        [ 0.39737739, -0.33933081,  0.15588373, ...,  0.60740335,
          0.55510816, -1.09954132]],

       [[-0.40108276, -0.60375753,  0.04158626, ...,  0.67961825,
          1.07036277,  1.10052244],
        [-0.58388917, -0.53750465,  1.42643215, ...,  0.5980099 ,
         -0.21962957,  0.4650023 ],
        [ 0.36238631, -0.08180719, -0.26043156, ...,  2.04102829,
         -0.93366626, -0.73679577],
        ...,
        [-0.64817513, -0.08603412,  0.98435836, ..., -

In [18]:
param_2d = np.random.randn(3, 3)
param_2d

array([[-1.06924431, -0.68107541,  0.86605599],
       [-0.46896809,  0.84013686,  0.60776054],
       [-1.21256271, -0.26579037,  1.42129975]])

In [19]:
def _pad_2d_obs(inp: ndarray, num: int):
    inp_pad = _pad_1d_batch(inp, num)
    other = np.zeros((num, inp.shape[0] + num * 2)) # pad columns
    return np.concatenate([other, inp_pad, other]) # pad rows

def _pad_2d(inp: ndarray, num: int):
    outs = [_pad_2d_obs(obs, num) for obs in inp]
    return np.stack(outs)

_pad_2d(imgs_2d_batch, 1).shape

(3, 30, 30)

In [20]:
# 2D convolutions: coding the forward pass
def _compute_output_obs_2d(obs: ndarray, param: ndarray):
    param_mid = param.shape[0] // 2
    obs_pad = _pad_2d_obs(obs, param_mid)
    out = np.zeros_like(obs)
    for o_w in range(out.shape[0]):
        for o_h in range(out.shape[1]):
            for p_w in range(param.shape[0]):
                for p_h in range(param.shape[1]):
                    out[o_w][o_h] += param[p_w][p_h] * obs_pad[o_w+p_w][o_h+p_h]
    return out

def _compute_output_2d(img_batch: ndarray, param: ndarray):    
    assert_dim(img_batch, 3)    
    outs = [_compute_output_obs_2d(obs, param) for obs in img_batch]    
    return np.stack(outs)

_compute_output_2d(imgs_2d_batch, param_2d).shape

(3, 28, 28)

In [21]:
# 2D convolutions: coding the backward pass
def _compute_grads_obs_2d(input_obs: ndarray, output_grad_obs: ndarray, param: ndarray) -> ndarray:    
    param_size = param.shape[0]
    output_obs_pad = _pad_2d_obs(output_grad_obs, param_size // 2)
    input_grad = np.zeros_like(input_obs)
    for i_w in range(input_obs.shape[0]):
        for i_h in range(input_obs.shape[1]):
            for p_w in range(param_size):
                for p_h in range(param_size):
                    input_grad[i_w][i_h] += output_obs_pad[i_w + param_size - p_w - 1][i_h + param_size - p_h - 1] * param[p_w][p_h]
    return input_grad

def _compute_grads_2d(inp: ndarray, output_grad: ndarray, param: ndarray) -> ndarray:
    grads = [_compute_grads_obs_2d(inp[i], output_grad[i], param) for i in range(output_grad.shape[0])]
    return np.stack(grads)

img_grads = _compute_grads_2d(imgs_2d_batch, np.ones_like(imgs_2d_batch), param_2d)
img_grads.shape

(3, 28, 28)

In [22]:
def _param_grad_2d(inp: ndarray, output_grad: ndarray, param: ndarray) -> ndarray:
    param_size = param.shape[0]
    inp_pad = _pad_2d(inp, param_size // 2)
    param_grad = np.zeros_like(param)
    img_shape = output_grad.shape[1:]
    for i in range(inp.shape[0]):
        for o_w in range(img_shape[0]):
            for o_h in range(img_shape[1]):
                for p_w in range(param_size):
                    for p_h in range(param_size):
                        param_grad[p_w][p_h] += inp_pad[i][o_w + p_w][o_h + p_h] * output_grad[i][o_w][o_h]
    return param_grad

param_grad = _param_grad_2d(imgs_2d_batch, np.ones_like(imgs_2d_batch), param_2d)
param_grad.shape

(3, 3)

In [23]:
# Channels
def _pad_2d_channel(inp: ndarray, num: int):
    return np.stack([_pad_2d_obs(channel, num) for channel in inp])

def _pad_conv_input(inp: ndarray, num: int):
    return np.stack([_pad_2d_channel(obs, num) for obs in inp])

In [24]:
# Forward
def _compute_output_obs(obs: ndarray, param: ndarray):
    assert_dim(obs, 3)
    assert_dim(param, 4)    
    param_size = param.shape[2]
    param_mid = param_size // 2
    obs_pad = _pad_2d_channel(obs, param_mid)    
    in_channels = param.shape[0]
    out_channels = param.shape[1]
    img_size = obs.shape[1]    
    out = np.zeros((out_channels,) + obs.shape[1:])
    for c_in in range(in_channels):
        for c_out in range(out_channels):
            for o_w in range(img_size):
                for o_h in range(img_size):
                    for p_w in range(param_size):
                        for p_h in range(param_size):
                            out[c_out][o_w][o_h] += param[c_in][c_out][p_w][p_h] * obs_pad[c_in][o_w+p_w][o_h+p_h]
    return out    

def _output(inp: ndarray, param: ndarray) -> ndarray:
    outs = [_compute_output_obs(obs, param) for obs in inp]
    return np.stack(outs)

In [25]:
# Backward
def _compute_grads_obs(input_obs: ndarray, output_grad_obs: ndarray, param: ndarray) -> ndarray:
    input_grad = np.zeros_like(input_obs)    
    param_size = param.shape[2]
    param_mid = param_size // 2
    img_size = input_obs.shape[1]
    in_channels = input_obs.shape[0]
    out_channels = param.shape[1]
    output_obs_pad = _pad_2d_channel(output_grad_obs, param_mid)    
    for c_in in range(in_channels):
        for c_out in range(out_channels):
            for i_w in range(input_obs.shape[1]):
                for i_h in range(input_obs.shape[2]):
                    for p_w in range(param_size):
                        for p_h in range(param_size):
                            input_grad[c_in][i_w][i_h] += output_obs_pad[c_out][i_w+param_size-p_w-1][i_h+param_size-p_h-1] * param[c_in][c_out][p_w][p_h]
    return input_grad

def _input_grad(inp: ndarray, output_grad: ndarray, param: ndarray) -> ndarray:
    grads = [_compute_grads_obs(inp[i], output_grad[i], param) for i in range(output_grad.shape[0])]
    return np.stack(grads)

In [26]:
def _param_grad(inp: ndarray, output_grad: ndarray, param: ndarray) -> ndarray:
    param_grad = np.zeros_like(param)    
    param_size = param.shape[2]
    param_mid = param_size // 2
    img_size = inp.shape[2]
    in_channels = inp.shape[1]
    out_channels = output_grad.shape[1]
    inp_pad = _pad_conv_input(inp, param_mid)
    img_shape = output_grad.shape[2:]
    for i in range(inp.shape[0]):
        for c_in in range(in_channels):
            for c_out in range(out_channels):
                for o_w in range(img_shape[0]):
                    for o_h in range(img_shape[1]):
                        for p_w in range(param_size):
                            for p_h in range(param_size):
                                param_grad[c_in][c_out][p_w][p_h] += inp_pad[i][c_in][o_w+p_w][o_h+p_h] * output_grad[i][c_out][o_w][o_h]
    return param_grad

In [27]:
# Experiments
X_train, y_train, X_test, y_test = load()
# slice sets off to run faster with acceptable accuracy, down from 60000, 10000
X_train = X_train[0:15000]
y_train = y_train[0:15000]
X_test = X_test[0:2500]
y_test = y_test[0:2500]
X_train, X_test = X_train - np.mean(X_train), X_test - np.mean(X_train)
X_train, X_test = X_train / np.std(X_train), X_test / np.std(X_train)
X_train_conv, X_test_conv = X_train.reshape(-1, 1, 28, 28), X_test.reshape(-1, 1, 28, 28)

In [28]:
num_labels = len(y_train)
train_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    train_labels[i][y_train[i]] = 1

num_labels = len(y_test)
test_labels = np.zeros((num_labels, 10))
for i in range(num_labels):
    test_labels[i][y_test[i]] = 1

In [29]:
def calc_accuracy_model(model, test_set):
    return print(f'''The model validation accuracy is: {np.equal(np.argmax(model.forward(test_set, inference=True), axis=1), y_test).sum() * 100.0 / test_set.shape[0]:.2f}%''')

In [30]:
model = NeuralNetwork(
    layers=[Conv2D(out_channels=16, param_size=5, dropout=0.8, weight_init="glorot", flatten=True, activation=Tanh()),
            Dense(neurons=10, activation=Linear())],
    loss = SoftmaxCrossEntropy(), 
    seed=20190402)

trainer = Trainer(model, SGDMomentum(lr = 0.1, momentum=0.9))
trainer.fit(X_train_conv, train_labels, X_test_conv, test_labels, epochs = 1, eval_every = 1, seed=20190402, batch_size=60, conv_testing=True)

Validation loss after 1 epochs is 7.907


In [31]:
calc_accuracy_model(model, X_test_conv)

The model validation accuracy is: 82.44%
