In [11]:
import numpy as np

In [2]:
def sigmoid(x):
    """Sigmoid activation function"""
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    """Derivative of sigmoid function"""
    return x * (1 - x)

In [3]:
# Set random seed for reproducibility
np.random.seed(1)

In [4]:
# Training data (AND function)
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0], [0], [0], [1]])  # AND gate output

In [5]:
# Network architecture
input_neurons = 2
hidden_neurons = 4  # Increased for better learning
output_neurons = 1

# Initialize weights with Xavier/Glorot initialization for better convergence
w_input_hidden = np.random.randn(input_neurons, hidden_neurons) * np.sqrt(1/input_neurons)
w_hidden_output = np.random.randn(hidden_neurons, output_neurons) * np.sqrt(1/hidden_neurons)

# Initialize biases to small values
b_hidden = np.zeros((1, hidden_neurons))
b_output = np.zeros((1, output_neurons))

In [6]:
# Training parameters
epochs = 10000  # Increased for better convergence
learning_rate = 0.1
print_interval = 1000  # Print loss less frequently

# Arrays to store loss for plotting
loss_history = []

In [7]:
# Training loop
for epoch in range(epochs):
    # --- Forward Propagation ---
    hidden_input = np.dot(X, w_input_hidden) + b_hidden
    hidden_output = sigmoid(hidden_input)

    final_input = np.dot(hidden_output, w_hidden_output) + b_output
    final_output = sigmoid(final_input)

    # --- Calculate Loss ---
    error = y - final_output
    loss = np.mean(np.square(error))
    loss_history.append(loss)

    # --- Backpropagation ---
    d_output = error * sigmoid_derivative(final_output)

    error_hidden = d_output.dot(w_hidden_output.T)
    d_hidden = error_hidden * sigmoid_derivative(hidden_output)

    # --- Update Weights and Biases ---
    w_hidden_output += hidden_output.T.dot(d_output) * learning_rate
    b_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate

    w_input_hidden += X.T.dot(d_hidden) * learning_rate
    b_hidden += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    # Print loss at intervals
    if epoch % print_interval == 0:
        print(f"Epoch {epoch} Loss: {loss:.6f}")

Epoch 0 Loss: 0.239578
Epoch 1000 Loss: 0.026046
Epoch 2000 Loss: 0.007112
Epoch 3000 Loss: 0.003503
Epoch 4000 Loss: 0.002200
Epoch 5000 Loss: 0.001563
Epoch 6000 Loss: 0.001195
Epoch 7000 Loss: 0.000959
Epoch 8000 Loss: 0.000796
Epoch 9000 Loss: 0.000678


In [8]:
# Test the network
print("\nFinal predictions after training:")
print(final_output.round(4))

# Expected outputs
print("\nExpected outputs:")
print(y)


Final predictions after training:
[[2.00e-04]
 [2.49e-02]
 [2.25e-02]
 [9.65e-01]]

Expected outputs:
[[0]
 [0]
 [0]
 [1]]


In [9]:
# Test with new inputs
def predict(input_data):
    """Make prediction with trained network"""
    h_layer = sigmoid(np.dot(input_data, w_input_hidden) + b_hidden)
    output = sigmoid(np.dot(h_layer, w_hidden_output) + b_output)
    return output.round(4)

In [10]:
# Verify each input individually
print("\nVerifying each input:")
for i in range(len(X)):
    input_data = X[i:i+1]
    prediction = predict(input_data)
    print(f"Input: {input_data[0]} → Output: {prediction[0][0]} (Expected: {y[i][0]})")


Verifying each input:
Input: [0 0] → Output: 0.0002 (Expected: 0)
Input: [0 1] → Output: 0.0249 (Expected: 0)
Input: [1 0] → Output: 0.0225 (Expected: 0)
Input: [1 1] → Output: 0.965 (Expected: 1)
