In [22]:
import numpy as np

In [23]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_der(x):
    return sigmoid(x) * (1 - sigmoid(x))

def softmax(x):
    exps = np.exp(x - np.max(x))
    return exps / np.sum(exps)

def softmax_der(x):
    pass

def relu(x):
    return np.maximum(0, x)

def relu_der(x):
    return (x > 0).astype(x.dtype)

def mse_loss(y_hat, y):
    return 1/len(y_hat) * np.sum(np.power(y - y_hat, 2), axis=1)

def mse_loss_der(y_hat, y):
    return -2 * np.sum(y - y_hat, axis=1)

In [5]:
class Dense:
    def __init__(self, input_size, num_of_neurons):
        self.w = np.random.rand(input_size, num_of_neurons)
        self.b = np.random.rand(num_of_neurons)
    
    def forward(self, x):
        return np.dot(x, self.w) + self.b
    
    def der_w(self, x):
        return x
    
    def der_b(self, x):
        return np.ones(self.b.shape[0])

In [64]:
batch_size = 1
length = 5
num_of_neurons = 10

x = np.zeros((batch_size, length))
l1 = Dense(length, num_of_neurons)
l2 = Dense(num_of_neurons, length)

for i in range(100):
    z1 = l1.forward(x)
    a1 = relu(z1)

    z2 = l2.forward(a1)
    a2 = relu(z2)

    loss = mse_loss(a2, x)

    grad = mse_loss_der(a2, x)
    grad = grad.reshape((1, grad.shape[-1])) * relu_der(a2)

    grad_w = np.dot(grad.T, z1).T
    grad_b = grad

    l2.w -= 1e-4 * grad_w
    l2.b -= 1e-4 * grad_b[0]
    
    print(loss)

[55.98832257]
[55.49302201]
[55.00216223]
[54.5157034]
[54.03360607]
[53.55583114]
[53.08233985]
[52.61309379]
[52.1480549]
[51.68718547]
[51.2304481]
[50.77780576]
[50.32922172]
[49.88465961]
[49.44408335]
[49.00745722]
[48.5747458]
[48.14591398]
[47.720927]
[47.29975036]
[46.88234992]
[46.46869181]
[46.05874248]
[45.65246869]
[45.24983746]
[44.85081616]
[44.4553724]
[44.06347412]
[43.67508952]
[43.29018711]
[42.90873566]
[42.53070424]
[42.15606217]
[41.78477907]
[41.41682483]
[41.0521696]
[40.6907838]
[40.33263811]
[39.9777035]
[39.62595116]
[39.27735256]
[38.93187944]
[38.58950376]
[38.25019776]
[37.91393391]
[37.58068495]
[37.25042383]
[36.92312377]
[36.59875822]
[36.27730087]
[35.95872565]
[35.64300671]
[35.33011845]
[35.02003549]
[34.71273268]
[34.40818508]
[34.106368]
[33.80725696]
[33.51082769]
[33.21705615]
[32.92591851]
[32.63739115]
[32.35145068]
[32.0680739]
[31.78723782]
[31.50891966]
[31.23309685]
[30.95974702]
[30.68884799]
[30.4203778]
[30.15431465]
[29.89063698]
[29.62