In [16]:
import torch
import torch.nn as nn
import torch.optim as optim

In [40]:
# XOR input and output data
X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)  # Inputs
y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)             # Outputs

In [41]:
# Define the neural network
# --- Here, nn.Module is the parent class.
#     It's the base class provided by PyTorch for building neural network models.
#     By inheriting nn.Module, your XORModel gains all the functionalities,
#     PyTorch provides for managing layers, weights, forward passes, and backpropagation.

class XORModel(nn.Module):
  def __init__(self):
    super(XORModel, self).__init__()
    self.hidden = nn.Linear(2, 4)  # Hidden layer (2 inputs, 4 neurons)
    self.output = nn.Linear(4, 1) # Output layer (4 neurons to 1 output)
    self.sigmoid = nn.Sigmoid()   # Activation function

  def forward(self, x):
    x = self.sigmoid(self.hidden(x))  # Apply sigmoid to hidden layer
    x = self.sigmoid(self.output(x))  # Apply sigmoid to output layer
    return x

In [42]:
# Initialize the model
model = XORModel()

In [45]:
# Loss function and optimizer
# ---It is a loss function used for binary classification problems,
#    where the goal is to predict either 0 or 1 as the output (e.g., yes/no, true/false, etc.).---
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification

# An optimizer adjusts the weights and biases of the model during training, based on the gradients of the loss function.
#--- Adam (short for Adaptive Moment Estimation) is a popular optimization algorithm in deep learning.
#    It automatically adjusts the learning rate for each parameter during training, making it more efficient and robust for many tasks.
#    model.parameters() retrieves all the trainable parameters of the model (weights and biases of all layers).---
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer and lr is the learning rate

In [51]:
# Train the model
epochs = 10000 # number of iterations
for epoch in range(epochs):
    # Forward pass

    # ---The input data X is passed through the model to calculate predictions.
    #    model(X) applies the weights, biases, and activation functions in the network to generate predictions.---
    predictions = model(X)
    # This compares the predicted outputs (predictions) with the actual targets (y) to see how wrong the model's predictions are.
    loss = criterion(predictions, y)

    # Backward pass and optimization
    # --- optimizer.zero_grad():
    #     Clears the previous gradients stored in the optimizer. ---
    optimizer.zero_grad()  # Clear gradients

    # it calculates how each weight contributed to the error, layer by layer.
    loss.backward()        # Compute gradients

    # optimizer.step(): Updates the weights and biases of the model.
    optimizer.step()       # Update weights and biases

    # Print loss every 1000 epochs
    if (epoch + 1) % 1000 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [1000/10000], Loss: 0.0000
Epoch [2000/10000], Loss: 0.0000
Epoch [3000/10000], Loss: 0.0000
Epoch [4000/10000], Loss: 0.0000
Epoch [5000/10000], Loss: 0.0000
Epoch [6000/10000], Loss: 0.0000
Epoch [7000/10000], Loss: 0.0000
Epoch [8000/10000], Loss: 0.0000
Epoch [9000/10000], Loss: 0.0000
Epoch [10000/10000], Loss: 0.0000


In [48]:
  # Test the model
with torch.no_grad():  # No need to calculate gradients for testing
    test_predictions = model(X)
    for i in range(len(X)):
        print(f"Input: {X[i].tolist()}, Predicted: {test_predictions[i].item():.2f}, Actual: {y[i].item()}")

Input: [0.0, 0.0], Predicted: 0.00, Actual: 0.0
Input: [0.0, 1.0], Predicted: 1.00, Actual: 1.0
Input: [1.0, 0.0], Predicted: 1.00, Actual: 1.0
Input: [1.0, 1.0], Predicted: 0.00, Actual: 0.0
