In [1]:
import numpy as np


# Activation Functions

class Activation:
    def __init__(self, func="sigmoid"):
        self.func = func

    def forward(self, z):
        if self.func == "sigmoid":
            return 1 / (1 + np.exp(-z))
        elif self.func == "relu":
            return np.maximum(0, z)
        elif self.func == "tanh":
            return np.tanh(z)
        else:
            raise ValueError("Unsupported activation")

    def derivative(self, z):
        if self.func == "sigmoid":
            s = self.forward(z)
            return s * (1 - s)
        elif self.func == "relu":
            return (z > 0).astype(float)
        elif self.func == "tanh":
            return 1 - np.tanh(z) ** 2
        else:
            raise ValueError("Unsupported activation")



# Layer
class Layer:
    def __init__(self, input_dim, output_dim, activation="relu"):
        self.W = np.random.randn(input_dim, output_dim) * 0.01
        self.b = np.zeros((1, output_dim))
        self.activation = Activation(activation)
        self.Z = None
        self.A = None

    def forward(self, X):
        self.Z = np.dot(X, self.W) + self.b
        self.A = self.activation.forward(self.Z)
        return self.A



# Loss Function
class LossFunction:
    def __init__(self, loss="binary_crossentropy"):
        self.loss = loss

    def compute(self, y_true, y_pred):
        m = y_true.shape[0]
        if self.loss == "binary_crossentropy":
            return - (1/m) * np.sum(y_true*np.log(y_pred+1e-9) + (1-y_true)*np.log(1-y_pred+1e-9))
        else:
            raise ValueError("Unsupported loss")

    def derivative(self, y_true, y_pred):
        return y_pred - y_true  # for sigmoid + BCE


# -------------------------------
# Forward Propagation
# -------------------------------
class ForwardProp:
    def __init__(self, layers):
        self.layers = layers

    def run(self, X):
        A = X
        for layer in self.layers:
            A = layer.forward(A)
        return A


# Backward Propagation
class BackProp:
    def __init__(self, layers, loss_func):
        self.layers = layers
        self.loss_func = loss_func

    def run(self, X, y):
        grads = {}
        m = X.shape[0]

        # Forward values already stored in layers
        L = len(self.layers)
        y_pred = self.layers[-1].A

        # Output layer gradient
        dZ = self.loss_func.derivative(y, y_pred)
        grads["dW" + str(L)] = (1/m) * np.dot(self.layers[-2].A.T, dZ)
        grads["db" + str(L)] = (1/m) * np.sum(dZ, axis=0, keepdims=True)

        # Backprop through hidden layers
        dA = np.dot(dZ, self.layers[-1].W.T)
        for l in reversed(range(L-1)):
            dZ = dA * self.layers[l].activation.derivative(self.layers[l].Z)
            A_prev = X if l == 0 else self.layers[l-1].A
            grads["dW" + str(l+1)] = (1/m) * np.dot(A_prev.T, dZ)
            grads["db" + str(l+1)] = (1/m) * np.sum(dZ, axis=0, keepdims=True)
            if l > 0:
                dA = np.dot(dZ, self.layers[l].W.T)

        return grads


# Optimizer: Gradient Descent
class GradDescent:
    def __init__(self, layers, learning_rate=0.01):
        self.layers = layers
        self.lr = learning_rate

    def step(self, grads):
        for idx, layer in enumerate(self.layers):
            layer.W -= self.lr * grads["dW" + str(idx+1)]
            layer.b -= self.lr * grads["db" + str(idx+1)]


# Training Loop
class Training:
    def __init__(self, model, loss_func, backprop, optimizer):
        self.model = model
        self.loss_func = loss_func
        self.backprop = backprop
        self.optimizer = optimizer

    def fit(self, X, y, epochs=1000):
        for i in range(epochs):
            # Forward
            y_pred = self.model.forward.run(X)

            # Loss
            loss = self.loss_func.compute(y, y_pred)

            # Backward
            grads = self.backprop.run(X, y)

            # Update
            self.optimizer.step(grads)

            if i % 100 == 0:
                print(f"Epoch {i}, Loss: {loss:.4f}")



# Model

class Model:
    def __init__(self, layer_dims, activations):
        """
        layer_dims: [input_dim, h1, h2, ..., output_dim]
        activations: list of activation functions for each layer (len = L-1)
        """
        assert len(layer_dims) - 1 == len(activations)
        self.layers = []
        for i in range(1, len(layer_dims)):
            self.layers.append(Layer(layer_dims[i-1], layer_dims[i], activation=activations[i-1]))
        self.forward = ForwardProp(self.layers)



# Example (DNN with 2 hidden layers)

if __name__ == "__main__":
    # XOR dataset
    X = np.array([[0,0],[0,1],[1,0],[1,1]])
    y = np.array([[0],[1],[1],[0]])

    # Build model: input=2, hidden1=4, hidden2=3, output=1
    model = Model(layer_dims=[2, 4, 3, 1], activations=["relu", "relu", "sigmoid"])
    loss_func = LossFunction()
    backprop = BackProp(model.layers, loss_func)
    optimizer = GradDescent(model.layers, learning_rate=0.1)
    trainer = Training(model, loss_func, backprop, optimizer)

    trainer.fit(X, y, epochs=1000)


Epoch 0, Loss: 0.6931
Epoch 100, Loss: 0.6931
Epoch 200, Loss: 0.6931
Epoch 300, Loss: 0.6931
Epoch 400, Loss: 0.6931
Epoch 500, Loss: 0.6931
Epoch 600, Loss: 0.6931
Epoch 700, Loss: 0.6931
Epoch 800, Loss: 0.6931
Epoch 900, Loss: 0.6931
