# Implementation notes

In [84]:
import numpy as np
from layer_test import eval_numerical_gradient_array
from layer import affine_forward

## Affine layer

### forward

In [2]:
num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

In [28]:
input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)
print('input size {}, weight size {}'.format(input_size, weight_size))

input size 240, weight size 360


In [122]:
# (N, d_1, ..., d_k)
x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
# (D, M)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.product(input_shape), output_dim)
# (M,)
b = np.linspace(-0.3, 0.1, num=output_dim)
print('x {}, w {}, b {}'.format(x.shape, w.shape, b.shape))

x (2, 4, 5, 6), w (120, 3), b (3,)


In [124]:
# A simple linear feedforward
N = x.shape[0]
# (2, 4, 5, 6) -> (2, 120)(120, 3) + (3,)
output = x.reshape([N, -1]).dot(w) + b

### backward

In [96]:
np.random.seed(231)
# (N, d_1, ..., d_k)
x = np.random.randn(10, 2, 3)
# (D, M)
w = np.random.randn(6, 5)
# (M)
b = np.random.randn(5)
# (N, M)
d_output = np.random.randn(10, 5)

In [120]:
N = d_output.shape[0]
# (10, 5)(5, 6) = (10, 6) -> (10, 2, 3)
d_x = d_output.dot(w.T).reshape(x.shape)
# -> (6, 10)(10, 5) = (6,5)
d_w = x.reshape([N, -1]).T.dot(d_output)
# (10, 5) => (5,)
d_b = np.sum(d_output, axis=0)

## ReLU layer

### forward

In [126]:
x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4)
print(x)

[[-0.5        -0.40909091 -0.31818182 -0.22727273]
 [-0.13636364 -0.04545455  0.04545455  0.13636364]
 [ 0.22727273  0.31818182  0.40909091  0.5       ]]


In [128]:
output = np.fmax(x, 0)
output

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.04545455,  0.13636364],
       [ 0.22727273,  0.31818182,  0.40909091,  0.5       ]])

### backward

In [140]:
np.random.seed(231)
x = np.random.rand(10, 10)
d_output = np.random.rand(*x.shape)
x

array([[ 0.78149404,  0.58420912,  0.42671007,  0.31534469,  0.82688829,
         0.90342948,  0.03893811,  0.9152226 ,  0.0605054 ,  0.18072686],
       [ 0.07229972,  0.45876213,  0.70835907,  0.48323507,  0.02544653,
         0.06690243,  0.42587801,  0.21924753,  0.81013282,  0.14005397],
       [ 0.7511994 ,  0.27910319,  0.94910551,  0.88502136,  0.61016226,
         0.7791191 ,  0.04155608,  0.78641275,  0.90247651,  0.43299087],
       [ 0.88658373,  0.54854139,  0.98697656,  0.56530906,  0.90103568,
         0.56373797,  0.09589743,  0.61752115,  0.06001465,  0.58039436],
       [ 0.87168015,  0.79984038,  0.06665333,  0.0601503 ,  0.89930015,
         0.64367389,  0.66795862,  0.10014901,  0.64597798,  0.04273061],
       [ 0.61705425,  0.26200791,  0.71747807,  0.99686535,  0.25998108,
         0.31931504,  0.15587636,  0.00868051,  0.4974258 ,  0.23329013],
       [ 0.10843416,  0.94530803,  0.89117147,  0.27868291,  0.82618052,
         0.44884675,  0.02227444,  0.9040584 

In [141]:
x = np.fmax(x, 0)
d_x = np.sign(x) * d_output
d_x

array([[ 0.76154896,  0.91196687,  0.78012778,  0.0426785 ,  0.09628379,
         0.96360132,  0.18255639,  0.42268435,  0.09856554,  0.39204418],
       [ 0.37009128,  0.17598795,  0.81255037,  0.15599316,  0.37459224,
         0.86791336,  0.09155779,  0.67419831,  0.51670093,  0.4308584 ],
       [ 0.51812879,  0.14569743,  0.45788877,  0.25368625,  0.31532703,
         0.4304745 ,  0.39769089,  0.9519803 ,  0.54986659,  0.12407137],
       [ 0.88699947,  0.88812986,  0.16012489,  0.43573028,  0.62325211,
         0.07705074,  0.02586914,  0.53237291,  0.05385741,  0.68296149],
       [ 0.20119076,  0.83558642,  0.11182925,  0.36794226,  0.9120864 ,
         0.4477751 ,  0.26554136,  0.2986834 ,  0.74768716,  0.08348653],
       [ 0.81137312,  0.39549242,  0.30543642,  0.20260782,  0.98372077,
         0.48823249,  0.17551912,  0.5949902 ,  0.69293361,  0.39953139],
       [ 0.53414021,  0.40751671,  0.54006139,  0.13837955,  0.22913498,
         0.07316323,  0.90060281,  0.1759985 