In [1]:
import numpy as np  # Importing the numpy library for numerical computations

# Define the sigmoid activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))  # Applies the sigmoid function to squash values between 0 and 1

# Define the derivative of the sigmoid activation function
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))  # Computes the derivative of the sigmoid function, used for backpropagation

# Initialize the weights and biases with random values
np.random.seed(42)  # Setting the seed for reproducibility of random values
W = np.random.randn(2, 1)  # Initializing weights randomly for a 2-input, 1-output setup (adjust for NOT gate)
b = np.random.randn(1)  # Initializing the bias randomly

# Set the learning rate and number of epochs
learning_rate = 0.1  # Learning rate controls the size of the update steps
num_epochs = 2000  # Number of times the network will go through the entire dataset

# Define the input and target outputs for each logic gate

# AND Gate
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input combinations for the AND gate
y = np.array([[0], [0], [0], [1]])  # Target output for the AND gate

# OR Gate
# X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input combinations for the OR gate
# y = np.array([[0], [1], [1], [1]])  # Target output for the OR gate

# NOT Gate
# Note: NOT gate is a single-input gate, so redefine the input and target
# W = np.random.randn(1, 1)  # Reset weights for a single input (only needed for NOT gate)
# X = np.array([[0], [1]])  # Input values for the NOT gate
# y = np.array([[1], [0]])  # Target output for the NOT gate

# NAND Gate
# X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input combinations for the NAND gate
# y = np.array([[1], [1], [1], [0]])  # Target output for the NAND gate

# NOR Gate
# X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input combinations for the NOR gate
# y = np.array([[1], [0], [0], [0]])  # Target output for the NOR gate

# Training loop
for epoch in range(num_epochs):
    # Forward pass
    z = np.dot(X, W) + b  # Compute the linear combination of inputs and weights, add bias
    a = sigmoid(z)  # Apply the sigmoid activation function

    # Compute the loss
    loss = np.mean((y - a) ** 2)  # Calculate the Mean Squared Error (MSE) loss

    # Compute the error
    error = (y - a)  # Difference between the target output and the predicted output

    # Backpropagation
    delta = error * sigmoid_derivative(z)  # Calculate the gradient for the output layer
    dW = np.dot(X.T, delta)  # Compute the gradient for the weights
    db = np.sum(delta)  # Compute the gradient for the bias

    # Update the weights and biases
    W -= learning_rate * dW  # Update weights using gradient descent
    b -= learning_rate * db  # Update bias using gradient descent

    # Print the loss at every 100 epochs
    if (epoch + 1) % 100 == 0:  # Print the loss every 100 iterations
        print(f"Epoch {epoch + 1}: Loss = {loss}")

# Test the model on the input data
# predictions = (a >= 0.5).astype(int)  # Uncomment to get binary predictions
print("Predicted Output:")
print(a)  # Print the final predicted output after training

Epoch 100: Loss = 0.6963547494440171
Epoch 200: Loss = 0.7244441795334924
Epoch 300: Loss = 0.7333433987852263
Epoch 400: Loss = 0.7376641033469217
Epoch 500: Loss = 0.7402093753277189
Epoch 600: Loss = 0.7418851544395703
Epoch 700: Loss = 0.7430713747998022
Epoch 800: Loss = 0.743954991863846
Epoch 900: Loss = 0.7446386023664195
Epoch 1000: Loss = 0.7451831678822602
Epoch 1100: Loss = 0.7456271763123506
Epoch 1200: Loss = 0.7459961184500012
Epoch 1300: Loss = 0.746307544488
Epoch 1400: Loss = 0.7465739312590969
Epoch 1500: Loss = 0.7468043912359856
Epoch 1600: Loss = 0.7470057345408888
Epoch 1700: Loss = 0.7471831526794381
Epoch 1800: Loss = 0.7473406723413224
Epoch 1900: Loss = 0.7474814646693546
Epoch 2000: Loss = 0.7476080609819206
Predicted Output:
[[0.99682342]
 [0.999098  ]
 [0.99928897]
 [0.99979846]]


In [2]:
# Solve the XOR gate using backpropagation

# Define the sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Define the input data and labels for the XOR gate problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input combinations for XOR gate
y = np.array([[0], [1], [1], [0]])  # Target output for XOR gate

# Initialize weights and biases with random values
np.random.seed(42)
W1 = np.random.randn(2, 2)  # Weights for the input to hidden layer (2 inputs, 2 neurons in hidden layer)
b1 = np.random.randn(2)     # Biases for the hidden layer
W2 = np.random.randn(2, 1)  # Weights for the hidden to output layer (2 neurons in hidden layer, 1 output)
b2 = np.random.randn(1)     # Bias for the output layer

# Set the learning rate and number of epochs
learning_rate = 0.1
num_epochs = 10000  # Increased number of epochs for better training

# Training loop
for epoch in range(num_epochs):
    # Forward pass
    # Hidden layer
    z1 = np.dot(X, W1) + b1  # Linear combination for the hidden layer
    a1 = sigmoid(z1)         # Activation for the hidden layer

    # Output layer
    z2 = np.dot(a1, W2) + b2  # Linear combination for the output layer
    a2 = sigmoid(z2)          # Activation for the output layer (final output)

    # Compute the loss (Mean Squared Error)
    loss = np.mean((y - a2) ** 2)

    # Backpropagation
    # Output layer error
    error_output = y - a2
    delta_output = error_output * sigmoid_derivative(z2)  # Gradient for output layer

    # Hidden layer error
    error_hidden = np.dot(delta_output, W2.T)
    delta_hidden = error_hidden * sigmoid_derivative(z1)  # Gradient for hidden layer

    # Compute gradients
    dW2 = np.dot(a1.T, delta_output)  # Gradient for W2
    db2 = np.sum(delta_output, axis=0)  # Gradient for b2
    dW1 = np.dot(X.T, delta_hidden)  # Gradient for W1
    db1 = np.sum(delta_hidden, axis=0)  # Gradient for b1

    # Update the weights and biases
    W2 += learning_rate * dW2
    b2 += learning_rate * db2
    W1 += learning_rate * dW1
    b1 += learning_rate * db1

    # Print the loss every 1000 epochs
    if (epoch + 1) % 1000 == 0:
        print(f"Epoch {epoch + 1}: Loss = {loss}")

# Test the model on the input data
print("Predicted Output for XOR Gate:")
print(a2)

Epoch 1000: Loss = 0.24442900063146195
Epoch 2000: Loss = 0.20359789746437934
Epoch 3000: Loss = 0.15340557867271964
Epoch 4000: Loss = 0.046398486976845954
Epoch 5000: Loss = 0.015626914962059084
Epoch 6000: Loss = 0.008452488215754311
Epoch 7000: Loss = 0.005615456282023922
Epoch 8000: Loss = 0.004147590639251693
Epoch 9000: Loss = 0.0032642383004088445
Epoch 10000: Loss = 0.0026792304865967086
Predicted Output for XOR Gate:
[[0.05395579]
 [0.95054019]
 [0.95009351]
 [0.05356085]]
