# hands-on implementation of cnn with numpy--------softmax_loss layer

## softmax
$$\sigma_{i}(z)=\frac{exp(z_{i})}{\sum_{j=1}^m exp(z_{j})} , i=1,...,m$$
## goal function (loss function)
cross entropy(衡量两个分布响度) 
这里
$$ - cross\_entropy(pred,target) == -log\_likelihood(\sigma_{y}(z)) $$

$$ log\_likelyhood:log(\sigma_{y}(z))=log(\frac{exp(z_{y})}{\sum_{j=1}^m exp(z_{j})}) = z_{y}-log(\sum_{j=1}^m{e^{z_j}}) $$
## 反向传播
softmax没有参数的，所以我们只需要计算$\frac{\partial loss}{\partial z}$即可。
$$loss=-log\_likelyhood = log(\sum_{j=1}^m{e^{z_j}}) - z_{y} $$
$$\frac{\partial loss}{\partial z_k} = \frac{exp(z_{k})}{\sum_{j=1}^m exp(z_{j})} - \delta_{ky} = \sigma_{k}(z)-\delta_{ky}, \delta_{ky}= \begin{cases} 0& \text{y!=k}\\ 1& \text{y=k} \end{cases}$$

In [1]:
import numpy as np
print(np.__version__)

1.16.4


In [11]:
class Softmax(object):
    def __init__(self,shape):
        self.input_shape = np.zeros(shape)
        self.batch_size = shape[0]
        # loss 对于 z求偏导
        self.delta = np.zeros(shape)
    def cal_loss(self,pred,target):
        self.target = target
        self.pred = pred
        self.loss = 0
        for i in range(self.batch_size):
            self.loss += np.log(np.sum(np.exp(pred[i])))
            self.loss -= pred[i,target[i]]
            
        self.forward_propagate(pred)
        return self.loss
    
    def forward_propagate(self,pred):
        self.prob = np.zeros(pred.shape)
        for i in range(self.batch_size):
            # 防止运算溢出
            pred[i,:] -= np.max(pred[i,:])
            softmax_prob = np.exp(pred[i]) / np.sum(np.exp(pred[i]))
            self.prob[i] = softmax_prob
        return self.prob
    
    def predict(self):
        """must be called after forward/cal_loss"""
        return np.argmax(self.prob,axis=1)
    
    def backward_propagate(self):
        self.delta = self.prob.copy()
        for i in range(self.batch_size):
            self.delta[i,self.target[i]] -= 1
        return self.delta

In [12]:
# test code
a = np.array([[1,2,3,5],[4,5,2,3]])
mysoftmax = Softmax(a.shape)
loss = mysoftmax.cal_loss(a,[3,1])
pred = mysoftmax.predict()
print(loss,pred)
delta = mysoftmax.backward_propagate()
print(delta)

0.6253721511650081 [3 1]
[[ 0.01521943  0.0413707   0.11245721 -0.16904734]
 [ 0.23688282 -0.35608574  0.0320586   0.08714432]]


In [3]:
# demo code
np.argmax([[1,2,3,5],[4,5,2,3]],axis=1)

array([3, 1])