In [13]:
import numpy as np
np.random.seed(1234)

In [14]:
x = np.random.rand(5, 4)
w = np.random.randn(4, 3)
b = np.ones((1, 3))

t = np.zeros((5, 3))
t[0, 0] = 1
t[1, 0] = 1
t[2, 1] = 1
t[3, 1] = 1
t[4, 2] = 1

In [15]:
x

array([[ 0.19151945,  0.62210877,  0.43772774,  0.78535858],
       [ 0.77997581,  0.27259261,  0.27646426,  0.80187218],
       [ 0.95813935,  0.87593263,  0.35781727,  0.50099513],
       [ 0.68346294,  0.71270203,  0.37025075,  0.56119619],
       [ 0.50308317,  0.01376845,  0.77282662,  0.88264119]])

In [16]:
w

array([[ 1.32115819, -1.54690555, -0.20264632],
       [-0.65596934,  0.19342138,  0.55343891],
       [ 1.31815155, -0.46930528,  0.67555409],
       [-1.81702723, -0.18310854,  1.05896919]])

In [21]:
def logistic_model(inputs, weights, bias):
    z = np.dot(x, w) + b
    z = z - np.max(z, axis=1, keepdims=True) ## Safe softmax!
    _e = np.exp(z)
    logits = np.divide(_e, np.sum(_e, axis=1, keepdims=True))
    return logits

In [22]:
def ce_loss(output, target):
    return -1.0 * np.mean( np.sum(np.multiply(target, np.log(output)), axis=1) )

In [23]:
output = logistic_model(x, w, b)

In [24]:
ce_loss(output, t)

2.04161245474255

---

In [31]:
def gradients(logits, target, inputs):
    assert logits.shape == target.shape
    assert inputs.shape[0] == logits.shape[0]
    
    grad_z =  (logits - target) / (1.0 * target.shape[0])
    grad_w = np.dot(inputs.T, grad_z)
    grad_b = np.sum(grad_z, axis=0, keepdims=True)
    
    return grad_w, grad_b

In [32]:
eg_w, eg_b = gradients(logistic_model(x, w, b), t, x)
assert eg_w.shape == w.shape
assert eg_b.shape == b.shape