In [136]:
import numpy as np

In [137]:
X_train = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
y_train = np.array([0, 1, 1, 0])

input_size = X_train.shape[0]
hidden_size = 3
output_size = 2 
learning_rate = 0.01
num_epochs = 1000

In [138]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [139]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [140]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(0)
    W1 = np.random.randn(hidden_size, input_size)
    b1 = np.zeros((hidden_size, 1))
    W2 = np.random.randn(output_size, hidden_size)
    b2 = np.zeros((output_size, 1))
    return W1, b1, W2, b2

In [141]:
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    return Z1, A1, Z2, A2

def calculate_loss(A2, Y):
    m = Y.shape[1]
    loss = -1/m * np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2))
    return loss

def backward_propagation(X, Y, Z1, A1, Z2, A2, W1, W2,b1,b2, learning_rate):
    m = Y.shape[1]

    dZ2 = A2 - Y
    dW2 = 1/m * np.dot(dZ2, A1.T)
    db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)

    dZ1 = np.dot(W2.T, dZ2) * sigmoid_derivative(A1)
    dW1 = 1/m * np.dot(dZ1, X.T)
    db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)

    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    return W1, b1, W2, b2

In [142]:
def train(X, y, hidden_size, output_size, learning_rate, num_epochs):
    input_size = X.shape[0]

    num_samples = len(y)
    Y = np.zeros((output_size, num_samples))
    Y[y, np.arange(num_samples)] = 1

    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)

    for epoch in range(num_epochs):
        Z1, A1, Z2, A2 = forward_propagation(X, W1, b1, W2, b2)
        loss = calculate_loss(A2, Y)
        W1, b1, W2, b2 = backward_propagation(X, Y, Z1, A1, Z2, A2, W1, W2,b1,b2, learning_rate)
                                              
    print(f" Loss: {loss}")

    return W1, b1, W2, b2

def predict(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_propagation(X, W1, b1, W2, b2)
    predictions = np.argmax(A2, axis=0)
    return predictions


In [143]:
W1, b1, W2, b2 = train(X_train, y_train, hidden_size, output_size, learning_rate, num_epochs)

# Test 
predictions = predict(X_train, W1, b1, W2, b2)
print("Predictions:", predictions)

 Loss: 0.7725533368054247
Predictions: [0 1 1 0]
