# hands-on implementation of cnn with numpy--------mycnn on mnist dataset

## 梯度优化得到最小的意义所在 -\> 最快
$$f(\vec{x}+\vec{\delta}) - f(\vec{x}) = \sum_{k=1}^n\frac{\partial f}{\partial x_{k}}*\delta_{k} + o(\Vert{x}\Vert^n) =\quad <\frac{\partial f}{\partial x}, \delta> \quad <= \Vert{\frac{\partial f}{\partial x}}\Vert * \Vert{\delta}\Vert$$
Cauchy-Schwarz不等式
极值条件，在$\delta$不全为０的情况下，当且仅当存在k使得$$\frac{\partial f}{\partial x_{i}} = K\delta_{i}$$对于所有的i成立，即向量方向相同
## 随机梯度下降法 从而优化全局梯度下降法
- kernel idea:同一时期的梯度存在一个期望，利用采样的小样本（mini-batch）的梯度去近似估计
- 在之前的层中 已经有backpropagate中对于这一部分的实现 或者是在update_params中实现 都是针对当前batch的data
- 在更新参数中引入了**L2 regularization 正则化(weight_decay)**

## cnn on mnist data

1. 准备数据
2. 搭建网络
3. 基于data进行train:迭代采样数据，进行前向计算，反向传播，求导，更新参数直到停止
4. 基于训练好的model进行predict等应用

In [1]:
import numpy as np
import time
import os
import matplotlib.pyplot as plt

from layers.conv_layer import Conv2D
from layers.fc_layer import FullyConnectedLayer
from layers.pooling_layers import MaxPooling,AveragePooling
from layers.softmax_layer import Softmax
from layers.activation_func_relu import Relu

### load data (mnist dataset)

In [2]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original',data_home='./dataset/')
print(mnist.data.shape)
print(mnist.target.shape)
datas = mnist.data
labels = mnist.target

(70000, 784)
(70000,)




In [3]:
(train_datas,train_labels) = (datas[:50000],labels[:50000])
(test_datas,test_labels) = (datas[50000:],labels[50000:])
print(train_datas.shape,train_labels.shape,test_datas.shape,test_labels.shape)

(50000, 784) (50000,) (20000, 784) (20000,)


### 双层神经网络搭建

In [4]:
batch_size = 32
conv1 = Conv2D([batch_size,28,28,1],32,3,1)
relu1 = Relu(conv1.output_shape)
pool1 = MaxPooling(relu1.output_shape,2,2)
conv2 = Conv2D(pool1.output_shape,64,3,1)
relu2 = Relu(conv2.output_shape)
pool2 = MaxPooling(relu2.output_shape,2,2)
conv3 = Conv2D(pool2.output_shape,128,3,1)
relu3 = Relu(conv3.output_shape)
pool3 = MaxPooling(relu3.output_shape,2,2)
fc = FullyConnectedLayer(pool3.output_shape,10)
problayer = Softmax(fc.output_shape)

In [None]:
epochs = 10

for epoch in range(epochs):
    learning_rate = 1e-3
    train_loss = 0
    train_acc = 0
    test_loss = 0
    test_acc = 0

    # train
    for i in range(train_datas.shape[0]//batch_size):
        batch_loss = 0
        batch_acc = 0
        # batch data
        img = train_datas[i*batch_size:(i+1)*batch_size].reshape(
                        [batch_size,28,28,1])
        target = train_labels[i*batch_size:(i+1)*batch_size]
        # forward
        conv1_out = conv1.forward_propagate(img)
        relu1_out = relu1.forward_propagate(conv1_out)
        pool1_out = pool1.forward_propagate(relu1_out)
        conv2_out = conv2.forward_propagate(pool1_out)
        relu2_out = relu2.forward_propagate(conv2_out)
        pool2_out = pool2.forward_propagate(relu2_out)
        conv3_out = conv3.forward_propagate(pool2_out)
        relu3_out = relu3.forward_propagate(conv3_out)
        pool3_out = pool3.forward_propagate(relu3_out)
        fc_out = fc.forward_propagate(pool3_out)
#         print(fc_out.shape)
#         print(np.array(target).shape)
        loss = problayer.cal_loss(fc_out,np.array(target))
        train_loss += loss
        batch_loss += loss
        
        for j in range(batch_size):
            if np.argmax(problayer.prob[j]) == train_labels[j]:
                batch_acc += 1
                train_acc += 1
        
        # backward
        # softmax 层自己已经有了loss 从而可以向后传播loss得到的梯度 
        problayer.backward_propagate()
        #使用嵌套方式计算梯度 并且自动更新参数
        conv1.backward_propagate(relu1.backward_propagate(
            pool1.backward_propagate(conv2.backward_propagate(
                relu2.backward_propagate(pool2.backward_propagate(
                    conv3.backward_propagate(relu3.backward_propagate(
                        pool3.backward_propagate(fc.backward_propagate(problayer.delta,
                            learning_rate=learning_rate))),learning_rate=learning_rate))),learning_rate=learning_rate))),learning_rate=learning_rate)
        if i % 50 == 0:
            print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + \
                      "  epoch: %d ,  batch: %5d , avg_batch_acc: %.4f  avg_batch_loss: %.4f  learning_rate %f" % (
                            epoch,i, batch_acc / float(batch_size), batch_loss / float(batch_size), learning_rate))
                
    print(time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()) + "  epoch: %5d , train_acc: %.4f  avg_train_loss: %.4f" % (
        epoch, train_acc / float(train_datas.shape[0]), train_loss / float(train_datas.shape[0])))           
    
    # test
    for i in range(images.shape[0]//batch_size):
        img = test_datas[i*batch_size:(i+1)*batch_size].reshape(
                        [batch_size,28,28,1])
        target = test_labels[i*batch_size:(i+1)*batch_size]
        # forward
        conv1_out = conv1.forward_propagate(img)
        relu1_out = relu1.forward_propagate(conv1_out)
        pool1_out = pool1.forward_propagate(relu1_out)
        conv2_out = conv2.forward_propagate(pool1_out)
        relu2_out = relu2.forward_propagate(conv2_out)
        pool2_out = pool2.forward_propagate(relu2_out)
        conv3_out = conv3.forward_propagate(pool2_out)
        relu3_out = relu3.forward_propagate(conv3_out)
        pool3_out = pool3.forward_propagate(relu3_out)
        fc_out = fc.forward_propagate(pool3_out)
        test_loss += problayer.cal_loss(fc_out,np.array(target))
        for j in range(batch_size):
            if np.argmax(problayer.prob[j]) == train_labels[j]:
                test_acc += 1
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "  epoch: %5d , val_acc: %.4f  avg_val_loss: %.4f" % (
            epoch, test_acc / float(test_datas.shape[0]), test_loss / float(test_datas.shape[0])))

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
2019-08-31 23:15:30  epoch: 0 ,  batch:     0 , avg_batch_acc: 0.0000  avg_batch_loss: inf  learning_rate 0.001000
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0

In [None]:
a = np.ones((32,6,6,128))
print(np.pad(a,((0,0),(0,1),(0,1),(0,0)),mode='constant',constant_values=0).shape)