In [None]:
import numpy as np
from sklearn.datasets import make_blobs

In [31]:
class NeuralNetwork:
    def __init__(self):
        self.lr = 0.1
        self.epochs = 500

    def weights_init(self, input_size, output_size):
        W = np.random.randn(input_size, output_size)
        b = np.random.randn(1, output_size)

        return W, b

    def ReLU(self, z):
        return np.maximum(0, z)

    def Softmax(self, z):
        exp_scores = np.exp(z)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

        return probs

    def cost(self, probs, m, y):
        return np.sum(-np.log(probs[np.arange(m), y]) / m)

    def forward_props(self, X, y, params):
        W1 = params["W1"]
        b1 = params["b1"]
        W2 = params["W2"]
        b2 = params["b2"]

        z1 = np.dot(X, W1) + b1
        a1 = self.ReLU(z1)

        z2 = np.dot(a1, W2) + b2
        probs = self.Softmax(z2)

        cache = {
            "a0": X,
            "a1": a1,
            "probs": probs,
        }

        return probs, cache

    def backprops(self, cache, params, y, m):
        W1 = params["W1"]
        b1 = params["b1"]
        W2 = params["W2"]
        b2 = params["b2"]

        a0 = cache["a0"]
        a1 = cache["a1"]
        probs = cache["probs"]

        dz2 = probs
        dz2[np.arange(m), y] -= 1
        dz2 /= m

        dW2 = np.dot(a1.T, dz2)
        db2 = np.sum(dz2, axis=0, keepdims=True)

        dz1 = np.dot(dz2, W2.T)
        dz1 = dz1 * (a1 > 0)

        dW1 = np.dot(a0.T, dz1)
        db1 = np.sum(dz1, axis=0, keepdims=True)

        grads = {
            "dW1": dW1,
            "dW2": dW2,
            "db1": db1,
            "db2": db2,
            }

        return grads

    def params_update(self, params, grads):
        W1 = params["W1"]
        b1 = params["b1"]
        W2 = params["W2"]
        b2 = params["b2"]

        dW1 = grads["dW1"]
        db1 = grads["db1"]
        dW2 = grads["dW2"]
        db2 = grads["db2"]

        W1 -= self.lr * dW1
        b1 -= self.lr * db1
        W2 -= self.lr * dW2
        b2 -= self.lr * db2

        params = {
            "W1": W1,
            "b1": b1,
            "W2": W2,
            "b2": b2,
        }

        return params

    def train(self, X, y):
        m, n = X.shape
        h1 = 16
        h2 = len(np.unique(y))
        W1, b1 = self.weights_init(n, h1)
        W2, b2 = self.weights_init(h1, h2)

        params = {
            "W1": W1,
            "b1": b1,
            "W2": W2,
            "b2": b2,
        }

        for ep in range(self.epochs):
            probs, cache = self.forward_props(X, y, params)

            loss = self.cost(probs, m, y)
            if ep+1 == 1 or ep+1 == self.epochs or (ep+1)%50 == 0:
                print(f'<EP{ep+1}> Loss: {loss}')

            grads = self.backprops(cache, params, y, m)
            params = self.params_update(params, grads)

if __name__ == "__main__":
    X, y = make_blobs(n_samples=500, n_features=5, centers=5)
    nn = NeuralNetwork()
    nn.train(X, y)

<EP1> Loss: 48.249937433930924
<EP50> Loss: 0.0228202032025797
<EP100> Loss: 0.012298731359709249
<EP150> Loss: 0.00793503133837336
<EP200> Loss: 0.005658178248127125
<EP250> Loss: 0.004347574067854021
<EP300> Loss: 0.0035159710305199686
<EP350> Loss: 0.002938905273345385
<EP400> Loss: 0.002512796724371166
<EP450> Loss: 0.002185183304272873
<EP500> Loss: 0.0019256285816536991
