##### 1. 推导softmax clf 的 analytic gradient，并采用 numpy 实现其计算

##### 2. 用numerical gradient 来检验实现的正确性

In [1]:
import numpy as np

In [2]:
x = np.array([0.2,0.3,0.1,0.5]).reshape(1,4) #x n,3072
y = np.array([1]) #n ,1
W = np.random.randn(3,4) #10，3072 10 n

In [3]:
def eval_analytic_grad(W,x, y):
    scores = x.dot(W.T)
    exp_scores = np.exp(scores).reshape(W.shape[0],)
    pro_scores = exp_scores / np.sum(exp_scores)
    grad = np.zeros(W.shape)
    for i in range(W.shape[0]):
        if(i==y):
            grad[i] = (pro_scores[i] - 1) * x 
        else:
            grad[i] = pro_scores[i] * x
    return grad

In [4]:
def loss(W,x,y):
    scores = x.dot(W.T)
    exp_scores = np.exp(scores).reshape(W.shape[0],)
    pro_scores = exp_scores / np.sum(exp_scores)
    return -np.log(pro_scores[y])

In [5]:
def eval_numerical_grad(W,x,y):
    h = 1e-5
    grad = np.zeros(W.shape)
    for i in range(W.shape[0]):
        for j in range(W.shape[1]):
            fx = loss(W,x,y)
            W[i][j] =  W[i][j] + h
            fx_h = loss(W,x,y)
            grad[i][j] = (fx_h - fx) / h
    return grad

In [6]:
numerical_grad = eval_numerical_grad(W,x,y)
analytic_grad = eval_analytic_grad(W,x,y)
print('numerical grad:',numerical_grad)
print('analytic_grad:',analytic_grad)
print('diff',np.sum(np.abs(numerical_grad-analytic_grad)))

numerical grad: [[ 0.07695148  0.1154274   0.03847585  0.1923796 ]
 [-0.17587639 -0.26381451 -0.08793815 -0.43969058]
 [ 0.09892438  0.14838675  0.0494623   0.24731188]]
analytic_grad: [[ 0.07695144  0.11542715  0.03847572  0.19237859]
 [-0.17587631 -0.26381447 -0.08793816 -0.43969078]
 [ 0.09892488  0.14838731  0.04946244  0.24731219]]
diff 3.2698969241601317e-06


In [7]:
epoch = 200;
eta = 0.5;
for i in range(epoch):   
    if(i%20==0):
        print('%d loss:%f' %(i,loss(W,x,y)))
    grad = eval_numerical_grad(W,x,y)
    W = W - grad * eta

0 loss:2.115123
20 loss:0.281491
40 loss:0.116665
60 loss:0.071513
80 loss:0.051179
100 loss:0.039732
120 loss:0.032423
140 loss:0.027363
160 loss:0.023657
180 loss:0.020829


In [8]:
epoch = 200;
eta = 0.5;
for i in range(epoch):   
    if(i%20==0):
        print('%d loss:%f' %(i,loss(W,x,y)))
    grad = eval_analytic_grad(W,x,y)
    W = W - grad * eta

0 loss:0.018600
20 loss:0.016799
40 loss:0.015315
60 loss:0.014070
80 loss:0.013011
100 loss:0.012100
120 loss:0.011307
140 loss:0.010612
160 loss:0.009997
180 loss:0.009449
