In [1]:
import numpy as np
# Activation Functions
class Activation:
    def __init__(self, func="sigmoid"):
        self.func = func

    def forward(self, z):
        if self.func == "sigmoid":
            return 1 / (1 + np.exp(-z))
        elif self.func == "relu":
            return np.maximum(0, z)
        else:
            raise ValueError("Unsupported activation")

    def derivative(self, z):
        if self.func == "sigmoid":
            s = self.forward(z)
            return s * (1 - s)
        elif self.func == "relu":
            return (z > 0).astype(float)
        else:
            raise ValueError("Unsupported activation")



# Neuron
class Neuron:
    def __init__(self, input_dim, activation="sigmoid"):
        self.weights = np.random.randn(input_dim, 1) * 0.01
        self.bias = np.zeros((1, 1))
        self.activation = Activation(activation)
        self.z = None
        self.a = None

    def forward(self, x):
        self.z = np.dot(x, self.weights) + self.bias
        self.a = self.activation.forward(self.z)
        return self.a



# Layer

class Layer:
    def __init__(self, input_dim, output_dim, activation="sigmoid"):
        self.W = np.random.randn(input_dim, output_dim) * 0.01
        self.b = np.zeros((1, output_dim))
        self.activation = Activation(activation)
        self.Z = None
        self.A = None

    def forward(self, X):
        self.Z = np.dot(X, self.W) + self.b
        self.A = self.activation.forward(self.Z)
        return self.A



# Parameters (wrapper for weights/biases)
class Parameters:
    def __init__(self, layers):
        self.layers = layers  # store Layer objects


# -------------------------------
# Loss Function
# -------------------------------
class LossFunction:
    def __init__(self, loss="binary_crossentropy"):
        self.loss = loss

    def compute(self, y_true, y_pred):
        m = y_true.shape[0]
        if self.loss == "binary_crossentropy":
            return - (1/m) * np.sum(y_true*np.log(y_pred+1e-9) + (1-y_true)*np.log(1-y_pred+1e-9))
        else:
            raise ValueError("Unsupported loss")

    def derivative(self, y_true, y_pred):
        return -(y_true / (y_pred+1e-9)) + ((1-y_true)/(1-y_pred+1e-9))


# -------------------------------
# Forward Propagation
# -------------------------------
class ForwardProp:
    def __init__(self, layers):
        self.layers = layers

    def run(self, X):
        A = X
        for layer in self.layers:
            A = layer.forward(A)
        return A



# Backward Propagation
class BackProp:
    def __init__(self, layers, loss_func):
        self.layers = layers
        self.loss_func = loss_func

    def run(self, X, y):
        grads = {}
        m = X.shape[0]

        # Output layer gradient
        L = len(self.layers)
        y_pred = self.layers[-1].A
        dZ = y_pred - y  # derivative for sigmoid + cross-entropy
        grads["dW" + str(L)] = (1/m) * np.dot(self.layers[-2].A.T, dZ)
        grads["db" + str(L)] = (1/m) * np.sum(dZ, axis=0, keepdims=True)

        # Hidden layer
        dA_prev = np.dot(dZ, self.layers[-1].W.T)
        dZ = dA_prev * self.layers[-2].activation.derivative(self.layers[-2].Z)
        grads["dW1"] = (1/m) * np.dot(X.T, dZ)
        grads["db1"] = (1/m) * np.sum(dZ, axis=0, keepdims=True)

        return grads



# Gradient Descent Optimizer
class GradDescent:
    def __init__(self, layers, learning_rate=0.01):
        self.layers = layers
        self.lr = learning_rate

    def step(self, grads):
        # Update hidden layer
        self.layers[0].W -= self.lr * grads["dW1"]
        self.layers[0].b -= self.lr * grads["db1"]

        # Update output layer
        self.layers[1].W -= self.lr * grads["dW2"]
        self.layers[1].b -= self.lr * grads["db2"]



# Training Loop
class Training:
    def __init__(self, model, loss_func, backprop, optimizer):
        self.model = model
        self.loss_func = loss_func
        self.backprop = backprop
        self.optimizer = optimizer

    def fit(self, X, y, epochs=1000):
        for i in range(epochs):
            # Forward
            y_pred = self.model.forward.run(X)

            # Loss
            loss = self.loss_func.compute(y, y_pred)

            # Backward
            grads = self.backprop.run(X, y)

            # Update
            self.optimizer.step(grads)

            if i % 100 == 0:
                print(f"Epoch {i}, Loss: {loss:.4f}")


# Model
class Model:
    def __init__(self, input_dim, hidden_dim, output_dim):
        # Two layers: hidden + output
        hidden = Layer(input_dim, hidden_dim, activation="sigmoid")
        output = Layer(hidden_dim, output_dim, activation="sigmoid")
        self.layers = [hidden, output]
        self.forward = ForwardProp(self.layers)


# Example Run
if __name__ == "__main__":
    # Dummy dataset (XOR-ish)
    X = np.array([[0,0],[0,1],[1,0],[1,1]])
    y = np.array([[0],[1],[1],[0]])

    model = Model(input_dim=2, hidden_dim=2, output_dim=1)
    loss_func = LossFunction()
    backprop = BackProp(model.layers, loss_func)
    optimizer = GradDescent(model.layers, learning_rate=0.1)
    trainer = Training(model, loss_func, backprop, optimizer)

    trainer.fit(X, y, epochs=1000)


Epoch 0, Loss: 0.6932
Epoch 100, Loss: 0.6931
Epoch 200, Loss: 0.6931
Epoch 300, Loss: 0.6931
Epoch 400, Loss: 0.6931
Epoch 500, Loss: 0.6931
Epoch 600, Loss: 0.6931
Epoch 700, Loss: 0.6931
Epoch 800, Loss: 0.6931
Epoch 900, Loss: 0.6931
