In [1]:
import numpy as np
from mynn import op as nn

batchsize = 16

# test 1 # 
# input = np.random.randn(batchsize, 10)
# l1 = nn.Linear(10, 20)
# l1(input).shape
# grad = np.random.randn(batchsize, 20)
# l1.backward(grad).shape

# test 2 #
def test2(verbose=False):
    iter_time = 10000
    N = 16*iter_time
    X_data = np.random.randn(N, 10)
    W_gt = np.random.randn(10, 20)
    b_gt = np.random.randn(20)
    Y_data = X_data@W_gt + b_gt
    l2 = nn.Linear(10, 20)
    for i in range(iter_time):
        X = X_data[i*16:(i+1)*16, :]
        Y_gt = Y_data[i*16:(i+1)*16, :]
        Y_pred = l2(X)
        loss = np.linalg.norm(Y_gt-Y_pred)
        if verbose:
            print(loss)
        grad = -(Y_gt-Y_pred)
        l2.backward(grad)
        for key in l2.grads.keys():
            l2.params[key] -= 1*l2.grads[key]
            if np.any(np.isnan(l2.params[key])):
                print(l2.grads[key])
                print(grad)
                raise ValueError("Need to break!!!") 
        # print([np.linalg.norm(l2.params[key]) for key in l2.grads.keys()])
    print(f"the residual norm is {np.linalg.norm(l2.W-W_gt), np.linalg.norm(l2.b-b_gt)}")

def test3(verbose=False):
    iter_time = 200000
    N = 16*iter_time
    X_data = np.random.randn(N, 10)
    W_gt = np.random.randn(10, 20)
    b_gt = np.random.randn(20)
    Y_data = X_data@W_gt + b_gt
    l1 = nn.Linear(10, 10)
    relu = nn.ReLU()
    l2 = nn.Linear(10, 20)
    for i in range(iter_time):
        X = X_data[i*16:(i+1)*16, :]
        Y_gt = Y_data[i*16:(i+1)*16, :]
        Y_pred = l2(relu(l1(X)))
        loss = np.linalg.norm(Y_gt-Y_pred)
        if verbose:
            print(loss)
        grad = -(Y_gt-Y_pred)

        # passing the grad to l2!
        passing_grad = l2.backward(grad)
        print(f"norm of the grad is {np.linalg.norm(grad)}")
        for key in l2.grads.keys():
            l2.params[key] -= 0.01*l2.grads[key]
            if np.any(np.isnan(l2.params[key])):
                print(l2.grads[key])
                print(f"grad is {grad}")
                raise ValueError("l2 Need to break!!!") 
            
        # passing the grad to relu!
        passing_grad = relu.backward(passing_grad)
        # no params to optimize for relu!

        # passing the grad to l1!
        l1.backward(passing_grad)
        print(np.linalg.norm(passing_grad))
        for key in l1.grads.keys():
            l1.params[key] -= 0.01*l1.grads[key]
            if np.any(np.isnan(l1.params[key])):
                print(l1.grads[key])
                print(grad)
                raise ValueError("l1 Need to break!!!")        

def test4():
    con1 = nn.Conv2D(in_channels=3, out_channels=6, kernel_size=4)
    con2 = nn.Conv2D(in_channels=6, out_channels=12, kernel_size=5, stride=2)
    X = np.random.rand(16, 3, 32, 32)
    print(con2(con1(X)).shape)
    grad = np.zeros((16, 12, 13, 13))
    con2.backward(grad)


if __name__ == "__main__":
    test4()

(16, 12, 13, 13)


In [2]:
a = np.random.randn(16, 3, 4, 4)
b = np.random.randn(6, 3, 4, 4)
# np.matmul(a.reshape(16, -1),b.reshape(6, -1).transpose(1, 0)).shape
# np.mean((np.random.randn(10,20,30).transpose(-1,-2)), axis=0).shape