In [1]:
### chan 2020/11/20
### convolution forward and backward
import numpy as np

In [2]:
### convolution layer: forward calculate
def conv_forward(x, kernel, b, padding=(0,0), strides=(1,1)):
    '''
    inputs:
    x: input tensor, shape(batchSize, channels, height, weight)
    kernel: shape(channels, numKernel, k1, k2)
    x * kernel = (batchSize, channels, height, weight)*(channels, numKernel, k1, k2) = (bacthSize, numKernel, height, weight)
    b: bias, shape(numKernel,)

    padding: (x,y), shape(h,w) --> shape(h+2x, w+2y)
    strides: (x direction, y direction)
    
    output:
    z: shape(batchSize, numKernel, 1+(height-k1)//strides[1], 1+(weight-k2)//strides[0])
    '''
    #assert kernel.shape[0]==x.shape[1]
    #assert b.shape[0]==kernel.shape[1]
    if padding != (0,0):
        x = np.lib.pad(x, ((0,0),(0,0),(padding[0],padding[0]),(padding[1],padding[1])), 'constant', constant_values=0)
        
    ## if height/weight cant be divided by stride, then padding zero
    channels, numKernel, k1, k2 = kernel.shape
    while (x.shape[2]-k1)%strides[1] != 0 :
        #print(x.shape[2], k1)
        x = np.lib.pad(x, ((0,0),(0,0),(0,1),(0,0)), 'constant', constant_values=0)
    while (x.shape[3]-k2)%strides[0] != 0 :
        x = np.lib.pad(x, ((0,0),(0,0),(0,0),(0,1)), 'constant', constant_values=0)
    assert x.shape[1] == channels
    batchSize, channels, height, weight = x.shape
    assert (height-k1)%strides[1] == 0 
    assert (weight-k2)%strides[0] == 0 
    #print(x.shape)

    ## calculate conv
    print(height-k1)
    z = np.zeros((batchSize, numKernel, 1+(height-k1)//strides[1], 1+(weight-k2)//strides[0]))
    for n in np.arange(batchSize):
        for k in np.arange(numKernel):
            for h in np.arange(height-k1+1)[::strides[1]]:
                for w in np.arange(weight-k2+1)[::strides[0]]:
                    #print(n,k,h,w)
                    z[n, k, h//strides[1], w//strides[0]] = np.sum(x[n, :, h:h+k1, w:w+k2] * kernel[:,k]) + b[k]
    return z

In [None]:
'''
x = np.random.randn(2, 3, 20, 20).astype(np.float64)
kernel = np.random.randn(3, 4, 2, 2).astype(np.float64) 
b = np.zeros(4).astype(np.float64)
z = conv_forward(x, kernel, b, padding=(1,1), strides=(2,2))    
print(z.shape)
'''

In [3]:
def _insertZeros(dzNext, strides=(2,2)):
    '''
    For the dimention match, error_next should be transfomed 
    from (batchSize, numKernel, (H-k1)/strides[1]+1, (W-k2)/strides[0]+1  
    to (batchSize, numKernel, H, W)
    
    here, this function insert 0 in dzNext, each row and col
    for example, in dim 2 and 3, [[1,1],[2,2]] --> [[1, 0, 1],[0,0,0],[2,0,2]]
    '''
    _, _, H, W = dzNext.shape
    if strides[1] > 1:
        for row in np.arange(H-1, 0, -1):
            for n in np.arange(strides[1]-1):
                dzNext = np.insert(dzNext, row, 0, axis = 2)
    if strides[0] > 1:
        for col in np.arange(W-1, 0, -1):
            for n in np.arange(strides[1]-1):
                dzNext = np.insert(dzNext, col, 0, axis = 3)
    return dzNext
#pz = _insertZeros(z, (2,2))
#print(pz.shape)

In [4]:
### convolution layer backward
def conv_backward(dzNext, x, kernel, padding=(0,0), strides=(1,1)):
    '''
    inputs args:
    dzNext: error term of current layer, defined as dLoss(y,y*)/dz, where z is the output of current layer
    x: input of current layer, shape(batchSize, channels, height, weight)
    kernel: convolution kernels current layer, shape(batchSize, numKernel, k1, k2)
    
    output args:
    dK: d(dzNext)/d(weights in kernel)
    db: d(dzNext)/d(bias in kernel)
    dz: error term of layer before dzNext
    '''
    
    assert kernel.shape[1] == dzNext.shape[1]    # because number of kernel equal to channel of output feature map
    channels, numKernel, k1, k2 = kernel.shape
    batchSize, numKernel, NH, NW = dzNext.shape  # NH = (H-k1)//strides[1] +1, NW too.
    
    ### calculate dz of current layer using dzNext, according to chain rule
    ## dzNext: (batchSize, numKernel, H, W) -->(batchSize, numKernel, H, W)
    dzNextTrans1 = _insertZeros(dzNext, strides=(2,2))
    dzNextTrans2 = np.lib.pad(dzNextTrans1, 
                              ((0,0),(0,0),(k1-1,k1-1),(k2-1,k2-1)), 'constant', constant_values=0) # shape(batchSize, numKernel, H+1, W+1)
    
    ## kernel: (channels, numKernel, k1, k2) --> (numKernel, channels, k1, k2) and rot_180(kernel)
    kernelTrans = np.flip(kernel, (2,3))
    kernelTrans = np.swapaxes(kernelTrans, 0, 1)
    
    ## (1) set param b to zero.    (2) default strides is (1,1) and default padding is (0,0), so dz will be (batchSize, channels, H+1 -1, W+1 -1)
    dz = conv_forward(dzNextTrans2.astype(np.float64), kernelTrans.astype(np.float64), np.zeros((numKernel,), dtype=np.float64))
    
    ### remove padding
    if padding[0] > 0 and padding[1] > 0:
        dz = dz[:, :, padding[0]:-padding[0], padding[1]:-padding[1]]
    elif padding[0]>0:
        dz = dz[:, :, padding[0]:-padding[0], :]
    elif padding[1]>0:
        dz = dz[:, :, :, padding[1]:-padding[1]]
    
    
    ### calculate dk,  shape(channels, numKernel, k1, k2)
    xTrans = np.swapaxes(x, 0, 1)
    xTrans = np.lib.pad(xTrans,
                       ((0,0),(0,0),(padding[0],padding[0]),(padding[1],padding[1])), 'constant', constant_values=0)
    print(xTrans.shape)
    print(dzNextTrans1.shape)
    dk = conv_forward(xTrans.astype(np.float64), dzNextTrans1.astype(np.float64), np.zeros((numKernel,), dtype=np.float64))
    
    ### calculate db
    db = np.sum(np.sum(np.sum(dzNext, axis=-1), axis=-1), axis=0)

    return dk / batchSize, db / batchSize, dz

#loss = np.mean(np.sum(np.square(z*0.01), axis=-1))  # 损失函数值
#dzNext = z*0.01  # 损失函数关于网络输出的梯度
#dK, db, dz = conv_backward(dzNext, x, kernel, padding=(1,1), strides=(2,2))

In [11]:
# 没有padding,输入的高度和宽度是20*20,卷积核是2*2,输出高度和宽度就是20-2//2+1=11
batchSize = 2
channels = 3
H = 20
W = 20
numKernel = 4
k1 = 2
k2 = 2
padding_H = 1
padding_W = 1
strides_H = 2
strides_W = 2
x = np.random.randn(batchSize, channels, H, W).astype(np.float64)
K = np.random.randn(channels, numKernel, k1, k2).astype(np.float64) 
b = np.zeros(numKernel).astype(np.float64)
y_true = np.ones((batchSize, numKernel, (H+2*padding_H-k1)//strides_H +1, (W+2*padding_W-k2)//strides_W +1))
print(y_true.shape)
print(y_true)

(2, 4, 11, 11)
[[[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]

  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]

  [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
   [1. 1. 1. 1. 1. 1. 1. 1. 1. 

In [13]:
for i in range(50):
    # 前向
    y_predict = conv_forward(x, K, b, padding=(padding_H, padding_W), strides=(strides_H, strides_W))
    print(y_predict.shape)
    
    # 反向
    loss = np.mean(np.sum(np.square(y_predict-y_true),axis=-1))
    #loss, dy = mean_squared_loss(next_z, y_true)
    dy = y_predict - y_true
    
    dK, db, _ = conv_backward(dy, x, K, padding=(padding_H, padding_W), strides=(strides_H, strides_W))
    # 更新梯度
    print(dK.shape)
    K -= 0.001 * dK
    b -= 0.001 * db

    # 打印损失
    print("step:{},loss:{}".format(i, loss))

    if np.allclose(y_true, y_predict):
        print("yes")
        break

print(y_predict)

20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:0,loss:15.117241302038604
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:1,loss:12.32401358071482
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:2,loss:10.0790961950495
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:3,loss:8.268717672828494
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:4,loss:6.80386640837687
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:5,loss:5.614687071758678
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:6,loss:4.646185527803582
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:7,loss:3.8549290446681215
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:8,loss:3.206505038887808
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2, 4, 21, 21)
1
(3, 4, 2, 2)
step:9,loss:2.6735591048189513
20
(2, 4, 11, 11)
21
(3, 2, 22, 22)
(2,

In [53]:
import numpy as np
z=1
print(-z)


-1


In [16]:
print(7//2)

3


In [19]:
height = 30
k1 = 2
strides = (2,2)
for h in np.arange(height - k1 + 1)[::strides[0]]:
    print(h)

0
2
4
6
8
10
12
14
16
18
20
22
24
26
28


In [45]:
a=np.array([[1,2],[1,2]])
b=np.array([[3,3],[3,3]])
a = np.insert(a, 1, 0, axis = 0)
#print(a*b)
print(a)

[[1 2]
 [0 0]
 [1 2]]


In [42]:
H =10
for h in np.arange(H - 1, 0, -1):
    print(h)

9
8
7
6
5
4
3
2
1
