In [10]:
import numpy as np

class NeuralNetwork:
    def __init__(self, layers, alpha=0.1):
        self.W = [] # list weights
        self.layers = layers # layers is the actual architecture of the feed
        # forward network
        self.alpha = alpha # alpha is the learning rate

        # stopping before last two layers
        for i in np.arange(0, len(layers) - 2):
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1) # MxN weigtht matrix
            self.W.append(w / np.sqrt(layers[i]))
            # the additional one in the layers is the bias term
            # only input need bias term
            w = np.random.randn(layers[-2] + 1, layers[-1])
            self.W.append(w / np.sqrt(layers[-2]))

    def __repr__(self):
        return "NeuralNetwork: {}".format(
            "-".join(str(l) for l in self.layers)
        )
    
    def sigmoid(self, x):
        return 1.0 /  (1 + np.exp(-x))
    
    def sigmoid_deriv(self, x):
        return x * (1 - x)
    
    def fit_partial(self, x, y):
        # this is the actual backprop algo
        A = [np.atleast_2d(x)]

        # feedforward network/phase
        for layer in np.arange(0, len(self.W)):
            net = A[layer].dot(self.W[layer])
            out = self.sigmoid(net)

            A.append(out)

        # we are doing a do product between activation and weight matrix

        ## now backprop starts
        error = A[-1] - y
        D = [error * self.sigmoid_deriv(A[-1])] # D means delta

        # Delta is for updating weight matrix and it is scaled by learning rate


        for layer in np.arange(len(A) - 2, 0, -1):
            delta = D[-1].dot(self.W[layer].T)
            delta = delta * self.sigmoid_deriv(A[layer])
            D.append(delta)

# weight update
        D = D[::-1] # reversing the list
        for layer in np.arange(0, len(self.W)):
            self.W[layer] += -self.alpha * A[layer].T.dot(D[layer]) 
            # actual learning is happening
            # back prop done


    
    def fit(self, x, y, epochs=1000, displayUpdate=100):
        # inserting a column of ones to the martix
        # to have it as trainable param
        # output activation for each layer
        x = np.c_[x, np.ones((X.shape[0]))]
        
# loop over the desired number of epochs
        for epoch in np.arange(0, epochs):
            for (x, target) in zip(x, y):
                self.fit_partial(x, target)
            
            if epoch == 0 or (epoch + 1) % displayUpdate == 0:
                loss = self.calculate_loss(x, y)
                print("[INFO] epoch={} , loss={:.7f}".format(
                    epoch + 1, loss 
                ))

    def predict(self, x, addBias=True):
        p = np.atleast_2d(x)
        if addBias:
            p = np.c_[p, np.ones((p.shape[0]))]

            for layer in np.arange(0, len(self.W)):
                p = self.sigmoid(np.dot(p, self.W[layer]))
            return p
        
    def calculate_loss(self, x, targets):
        targets = np.atleast_2d(targets)
        predictions = self.predict(x, addBias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)
        return loss

In [11]:
nn = NeuralNetwork([2, 2, 1])
print(nn)

NeuralNetwork: 2-2-1
