In [2]:
### chan 2020/11/19
### An simple demo for regression task, using a single layer neural network built by numpy.

In [3]:
import numpy as np
'''
x: R^(1,N)
W: R^(N,M)
b: R^(1,M)
y_pridict : R^(1,M)
y_true: R^(1,M)
'''

def forward(W, x, b):
    '''
    W: weights
    x: input vector
    b: bias
    '''
    return np.dot(W, x)+b

def backward(L, W, x, b):
    '''
    input args:
    L: y_predict - y_true
    delta: here, delta = L, because no activation and only one layer. L层的误差项
    W: weights
    x: input vector
    b: bias
    
    output args:
    dW: dL/dW, graident of W
    db: dL/db, gradient of b
    '''
    N = x.shape[0]
    dw = np.dot(L, x.T) # dw: R_(M,N)
    db = np.sum(L, axis=1).reshape(N, 1) # db: R_(M, 1) 
    return dw/N, db/N, L-1

def loss_MSE(y_predict, y_true):
    return np.mean(np.sum(np.square(y_predict-y_true),axis=-1))

In [1]:
import numpy as np
a = [[1,1],[2,2]]
b = [[1,1],[2,2]]

[[3 3]
 [6 6]]


In [4]:
def dataGen():
    '''
    regression: y = f(x) = Wx+b
    '''
    W = np.array([[1, 2, 3, 4, 6],
                        [1, 2, 3, 4, 6],
                        [3, 4, 5, 6, 2],
                        [9, 1, 10, 12, 1],
                        [3, 3, 1, 8, 9]])
    #x = np.random.randn(1, 10, 2000).reshape(400, 5)
    x = np.random.randint(0, 10, 2000).reshape(5, 400)
    print(x)
    b = np.array([1,2,3,4,5])
    y_true = (np.dot(W, x).T+b).T
    return x, y_true
x, y_true = dataGen()
print(y_true)

[[9 2 1 ... 3 9 6]
 [9 6 2 ... 0 1 1]
 [6 1 4 ... 4 1 5]
 [9 2 6 ... 4 5 3]
 [8 3 9 ... 1 7 7]]
[[130  44  96 ...  38  77  78]
 [131  45  97 ...  39  78  79]
 [166  56  88 ...  58  83  82]
 [270  65 136 ... 120 163 152]
 [209  73 147 ...  59 139 118]]


In [5]:
def training(x, y_true):
    epoch = 40
    learningRate = 0.01
    batchSize  = 2 # 先设为1，后面看下设为其他是不是会存在矛盾
    count = 0
    loss = 100.0
    ## init W and b
    W = np.random.randn(5,5)
    b = np.random.randn(5,1)
    
    while count < epoch :
        ## prepare feed data
        shuffle = np.random.permutation(np.arange(len(x.T)))  ## 打乱顺序
        trainData = x.T[shuffle]
        trainLabel = y_true.T[shuffle]
        for n in range(round(len(x.T)/batchSize)):
        #for n in range(1):
            feedData_train = trainData[n*batchSize:(n+1)*batchSize].T
            feedData_label = trainLabel[n*batchSize:(n+1)*batchSize].T
            y_predict = forward(W, feedData_train, b)
            #print(y_predict)
            L = y_predict - feedData_label  ## 这个的顺序一定不等乱，其实这里很奇怪，为什么不是求loss的梯度呢？而是求一个一阶差？
            #print(L)
            dw, db = backward(L, W, feedData_train, b)
            loss = loss_MSE(y_predict, feedData_label)
            if n%200==0:
                print('epoch: ', count+1, '    iteration: ', n, '/', int(len(x.T)/batchSize))
                print('W: ', W)
                print('b: ', b)
                print('Loss: ', loss)
            W -= learningRate*dw
            b -= learningRate*db
        count += 1
    print(W, b)
training(x, y_true)

epoch:  1     iteration:  0 / 200
W:  [[-0.14074047 -0.80563766  0.13073955 -0.15350077 -1.7625759 ]
 [ 0.03332024 -1.80750989  0.10031425 -0.58620485 -1.39013077]
 [-0.75539723 -0.35620864 -1.45771271 -0.58777818  0.19190497]
 [ 0.37449362 -0.50447977 -0.1063677  -0.53437406 -0.29044456]
 [ 0.28822506  0.7472894  -1.56256175 -1.70886817  0.13820128]]
b:  [[ 1.50142558]
 [ 0.03000104]
 [-1.35147429]
 [ 0.4128486 ]
 [-1.11165857]]
Loss:  42597.70355008725
epoch:  2     iteration:  0 / 200
W:  [[ 0.95697783  1.96541285  2.95935678  3.95127454  5.9316655 ]
 [ 1.04884251  2.04682835  3.04657528  4.05179756  6.07481759]
 [ 3.13416484  4.11980078  5.12121795  6.14124753  2.2074443 ]
 [ 9.09058607  1.0884316  10.07822849 12.0838623   1.13828174]
 [ 3.19642136  3.18752794  1.19112857  8.20556996  9.30379115]]
b:  [[ 2.06963225]
 [ 0.79042788]
 [-0.26364734]
 [ 1.85551299]
 [ 0.11563791]]
Loss:  2.221242136774297
epoch:  3     iteration:  0 / 200
W:  [[ 0.96068683  1.95960952  2.94995912  3.961

epoch:  27     iteration:  0 / 200
W:  [[ 0.98973     1.99309827  2.98928455  3.99104937  5.98914154]
 [ 1.01161434  2.00780517  3.01211811  4.01012227  6.01227984]
 [ 3.03133726  4.02105954  5.03269649  6.02731142  2.03313287]
 [ 9.02059199  1.01383841 10.02148515 12.01794657  1.0217719 ]
 [ 3.04689917  3.03151759  1.04893338  8.04087411  9.04958646]]
b:  [[1.24274528]
 [1.72547928]
 [2.25930147]
 [3.5132805 ]
 [3.8914748 ]]
Loss:  0.0773337923696236
epoch:  28     iteration:  0 / 200
W:  [[ 0.99076972  1.99021451  2.988856    3.98867202  5.99115997]
 [ 1.01043853  2.01106642  3.01260275  4.01281082  6.0099972 ]
 [ 3.02816474  4.02985888  5.03400414  6.03456553  2.02697395]
 [ 9.0185073   1.01962053 10.02234442 12.02271331  1.01772482]
 [ 3.04215119  3.04468663  1.05089041  8.05173057  9.04036906]]
b:  [[1.22863364]
 [1.74143814]
 [2.30236089]
 [3.54157523]
 [3.95591729]]
Loss:  0.22896726048867322
epoch:  29     iteration:  0 / 200
W:  [[ 0.98966968  1.98958735  2.99007159  3.9939278