Develop linear, ReLU, sigmoid, tanh, and softmax activation functions as a class for neural networks implementation.

Develop the class structure and forward propagation including the loss (cost) function implementation for a deep (multilayer) neural network

Develop the backpropagation implementation for a deep (multilayer) neural network (This is a mandatory but not graded programming assignment for this time.

In [1]:
import numpy as np

In [2]:
# Activation functions
class Activation:
    def __init__(self, name):
        if name == "linear":
            self.f = self.linear
            self.df = self.linear_d
        elif name == "relu":
            self.f = self.relu
            self.df = self.relu_d
        elif name == "sigmoid":
            self.f = self.sigmoid
            self.df = self.sigmoid_d
        elif name == "tanh":
            self.f = self.tanh
            self.df = self.tanh_d
        elif name == "softmax":
            self.f = self.softmax
            self.df = self.softmax_d

    def linear(self, x): return x
    def linear_d(self, x): return np.ones_like(x)

    def relu(self, x): return np.maximum(0, x)
    def relu_d(self, x): return (x > 0).astype(float)

    def sigmoid(self, x): return 1 / (1 + np.exp(-x))
    def sigmoid_d(self, x):
        s = self.sigmoid(x)
        return s * (1 - s)

    def tanh(self, x): return np.tanh(x)
    def tanh_d(self, x): return 1 - np.tanh(x)**2

    def softmax(self, x):
        e = np.exp(x - np.max(x, axis=0, keepdims=True))
        return e / np.sum(e, axis=0, keepdims=True)
    def softmax_d(self, x):
        s = self.softmax(x)
        return s * (1 - s)  # rarely used directly


# Deep Neural Network
class DNN:
    def __init__(self, layers, acts):
        # layers: [in, h1, h2, ..., out]
        # acts: activation names per layer
        self.L = len(layers) - 1
        self.W, self.b, self.act = {}, {}, {}
        for l in range(1, len(layers)):
            self.W[l] = np.random.randn(layers[l], layers[l-1]) * 0.01
            self.b[l] = np.zeros((layers[l], 1))
            self.act[l] = Activation(acts[l-1])

    def forward(self, X):
        cache = {"A0": X}
        A = X
        for l in range(1, self.L+1):
            Z = self.W[l] @ A + self.b[l]
            A = self.act[l].f(Z)
            cache["Z"+str(l)] = Z
            cache["A"+str(l)] = A
        return A, cache

    def loss(self, Yhat, Y, kind="ce"):
        m = Y.shape[1]
        if kind == "ce":  # cross entropy
            return -np.sum(Y * np.log(Yhat + 1e-8)) / m
        elif kind == "mse":
            return np.mean((Yhat - Y)**2)

    def backward(self, Yhat, Y, cache):
        grads = {}
        m = Y.shape[1]

        for l in reversed(range(1, self.L+1)):
            A_prev = cache["A"+str(l-1)]
            Z = cache["Z"+str(l)]

            if l == self.L:  # output layer
                dZ = Yhat - Y
            else:
                dA = grads["dA"+str(l)]
                dZ = dA * self.act[l].df(Z)

            grads["dW"+str(l)] = (1/m) * dZ @ A_prev.T
            grads["db"+str(l)] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
            grads["dA"+str(l-1)] = self.W[l].T @ dZ
        return grads

    def update(self, grads, lr=0.01):
        for l in range(1, self.L+1):
            self.W[l] -= lr * grads["dW"+str(l)]
            self.b[l] -= lr * grads["db"+str(l)]


In [3]:
# 4 input -> 5 hidden -> 3 output (softmax)
net = DNN([4, 5, 3], ["relu", "softmax"])

X = np.random.randn(4, 10)  # 10 samples
Y = np.eye(3)[:, np.random.choice(3, 10)]  # one-hot labels

Yhat, cache = net.forward(X)
print("Loss:", net.loss(Yhat, Y))

grads = net.backward(Yhat, Y, cache)
net.update(grads, lr=0.1)


Loss: 1.0986519104491905
