In [None]:
import numpy as np

# 7-3. Gradient of matrix addition

## Matrix and Vector Addition

In [2]:
X = np.array([
    [1.0, 2.0, 3.0],
    [4.0, 5.0, 6.0]
])
b = np.array([10.0, 20.0, 30.0])

In [3]:
X + b

array([[ 11.,  22.,  33.],
       [ 14.,  25.,  36.]])

## Function

In [4]:
def f(X, b):
    return np.sum(X + b)

In [5]:
f(X, b)

141.0

## Numerical Gradient

In [6]:
def numerical_gradient(X, b):
    h = 1e-4
    dX = np.zeros_like(X)
    db = np.zeros_like(b)
    
    itr = np.nditer(X, flags=['multi_index'], op_flags=['readwrite'])
    while not itr.finished:
        original = itr[0].copy()
        
        itr[0] = original + h
        v1 = f(X, b)
        itr[0] = original - h
        v2 = f(X, b)
        dX[itr.multi_index] = (v1 - v2) / (2 * h)
        
        itr[0] = original     
        itr.iternext()

    itr = np.nditer(b, flags=['multi_index'], op_flags=['readwrite'])
    while not itr.finished:
        original = itr[0].copy()
        
        itr[0] = original + h
        v1 = f(X, b)
        itr[0] = original - h
        v2 = f(X, b)
        db[itr.multi_index] = (v1 - v2) / (2 * h)
        
        itr[0] = original
        itr.iternext()
            
    return dX, db

In [7]:
numerical_gradient(X, b)

(array([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]]), array([ 2.,  2.,  2.]))

## Gradient of Matrix Vector Addition

$$
\boldsymbol{Y} = \boldsymbol{X} + \boldsymbol{b}
$$

$$
\frac{\partial L}{\partial \boldsymbol{X}} = \frac{\partial L}{\partial \boldsymbol{Y}}
$$

$$
\frac{\partial L}{\partial b} = \left[ \frac{\partial L}{\partial b_k} \right] = \left[ \sum_{i=1}^N \frac{\partial L}{\partial y_{ik}} \right]
$$

In [8]:
X = np.array([
    [1.0, 2.0, 3.0],
    [4.0, 5.0, 6.0]
])
b = np.array([10.0, 20.0, 30.0])
f(X, b)

141.0

In [9]:
dY = np.ones((2, 3))
dX = dY

In [10]:
dX

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [11]:
db = np.sum(dY, axis=0)

In [12]:
db

array([ 2.,  2.,  2.])

In [13]:
X2 = np.array([
    [1.0, 2.0, 3.0],
    [4.0, 5.0, 6.0]
])
b2 = np.array([11.0, 20.0, 30.0])
f(X2, b2)

143.0

In [14]:
X2 = np.array([
    [1.0, 2.0, 4.0],
    [4.0, 5.0, 6.0]
])
b2 = np.array([10.0, 20.0, 30.0])
f(X2, b2)

142.0

In [15]:
X3 = np.array([
    [2.0, 2.0, 4.0],
    [4.0, 5.0, 6.0]
])
b3 = np.array([11.0, 20.0, 31.0])
f(X3, b3)

147.0