In [11]:
import numpy as np #type: ignore
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        # TODO: return output
        pass
    
    def backward(self, output_gradient, learning_rate):
        # TODO: update parameters and return input gradient
        pass

In [12]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.bias = np.random.randn(output_size, 1)

    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias
    
    def backward(self, output_gradient, learning_rate):
       input_gradient = np.dot(self.weights.T, output_gradient)
       bias_gradient = np.sum(output_gradient, axis=1, keepdims=True)
       self.weights -= learning_rate * np.dot(output_gradient, self.input.T)
       self.bias -= learning_rate * bias_gradient
       return input_gradient

In [13]:
class Activation(Layer):
    def __init__(self, activation, activation_derivative):
        self.activation = activation
        self.activation_derivative = activation_derivative
        
    def forward(self, input):
        self.input = input
        return self.activation(self.input)
    
    def backward(self, output_gradient, learning_rate):
        #YOUR CODE HERE
        activation_grad = self.activation_derivative(self.input)
        input_gradient = output_gradient * activation_grad
        return input_gradient

In [14]:
class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        def sigmoid_derivative(x):
            s = sigmoid(x)
            return s * (1 - s)
        
        super().__init__(sigmoid, sigmoid_derivative)

In [15]:
def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_derivative(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)
network = [
    Dense(2, 3),
    Sigmoid(),
    Dense(3, 1),
    Sigmoid()
]

In [16]:
def predict(network, input):
    
    for layer in network:
        output = layer.forward(input)
        # print(f"\n  {layer.__class__.__name__} Output  \n {output}")
        input = output
        
    return output
# Input data (features) and true label
X = np.array([
    [0.5, 1.5],
    [1.0, 2.0],
    [1.5, 0.5],
    [3.0, 1.0]
]) 

In [19]:
y_true = np.array([[0], [0], [1], [1]])  # True output labels
def train(network, loss, loss_derivative, x_train, y_train, epochs = 100, learning_rate = 0.01, verbose = True):
    for e in range(epochs):
        error = 0
        for i in range(len(x_train)):
            inputs = x_train[i].reshape(-1,1) 
            target = y_train[i].reshape(-1,1)
            output = inputs
            for layer in network:
                output = layer.forward(output)
            error+=loss(target,output)
            grad = loss_derivative(target, output)
            for layer in reversed(network):  # Backpropagation through each layer
                grad = layer.backward(grad, learning_rate)
        error /= len(x_train)
        if verbose:
            print(f"{e + 1}/{epochs}, error={round(error, 2)}")

In [22]:
train(
    network,
    binary_cross_entropy,
    binary_cross_entropy_derivative,
    X,
    y_true,
    epochs=100,
    learning_rate=0.1
)

1/100, error=0.26
2/100, error=0.25
3/100, error=0.25
4/100, error=0.24
5/100, error=0.24
6/100, error=0.23
7/100, error=0.23
8/100, error=0.23
9/100, error=0.22
10/100, error=0.22
11/100, error=0.22
12/100, error=0.21
13/100, error=0.21
14/100, error=0.21
15/100, error=0.2
16/100, error=0.2
17/100, error=0.2
18/100, error=0.19
19/100, error=0.19
20/100, error=0.19
21/100, error=0.19
22/100, error=0.18
23/100, error=0.18
24/100, error=0.18
25/100, error=0.18
26/100, error=0.17
27/100, error=0.17
28/100, error=0.17
29/100, error=0.17
30/100, error=0.16
31/100, error=0.16
32/100, error=0.16
33/100, error=0.16
34/100, error=0.16
35/100, error=0.15
36/100, error=0.15
37/100, error=0.15
38/100, error=0.15
39/100, error=0.15
40/100, error=0.14
41/100, error=0.14
42/100, error=0.14
43/100, error=0.14
44/100, error=0.14
45/100, error=0.14
46/100, error=0.13
47/100, error=0.13
48/100, error=0.13
49/100, error=0.13
50/100, error=0.13
51/100, error=0.13
52/100, error=0.12
53/100, error=0.12
54/10