### numpy ###

In [2]:
import numpy as np

class Model:
    def __init__(self):
        # Initialize weights and biases
        self.W1 = np.random.randn(2, 4)
        self.b1 = np.zeros((1, 4))
        self.W2 = np.random.randn(4, 1)
        self.b2 = np.zeros((1, 1))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward(self, X):
        # Hidden layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        
        # Output layer
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        
        return self.a2

    def backward(self, X, y, output):
        # Output layer error
        self.output_error = y - output
        self.output_delta = self.output_error * self.sigmoid_derivative(output)
        
        # Hidden layer error
        self.hidden_error = np.dot(self.output_delta, self.W2.T)
        self.hidden_delta = self.hidden_error * self.sigmoid_derivative(self.a1)
        
        # Update weights and biases
        self.W2 += np.dot(self.a1.T, self.output_delta)
        self.b2 += np.sum(self.output_delta, axis=0, keepdims=True)
        self.W1 += np.dot(X.T, self.hidden_delta)
        self.b1 += np.sum(self.hidden_delta, axis=0, keepdims=True)

    def train(self, X, y, epochs=10000):
        for _ in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output)

# XOR training data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Train and test the network
network = Model()
network.train(X, y)

# Test predictions
print("Predictions:")
for i in range(len(X)):
    prediction = network.forward(X[i].reshape(1, 2))
    print(f"{X[i]} XOR: {prediction[0][0]:.4f}")

Predictions:
[0 0] XOR: 0.0103
[0 1] XOR: 0.9913
[1 0] XOR: 0.9893
[1 1] XOR: 0.0091


### pytorch ###

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

class PyTorchXORNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Sequential(
            nn.Linear(2, 4),
            nn.Sigmoid()
        )
        self.output = nn.Sequential(
            nn.Linear(4, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        hidden = self.hidden(x)
        return self.output(hidden)

# Check CUDA availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# XOR training data
X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32, device=device)
y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32, device=device)

# Initialize network
model = PyTorchXORNetwork().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)

# Training loop
for epoch in range(10000):
    # Forward pass
    outputs = model(X)
    loss = criterion(outputs, y)
    
    # Backward pass and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Test predictions
print("\nPredictions:")
with torch.no_grad():
    for i in range(len(X)):
        prediction = model(X[i].reshape(1, 2))
        print(f"{X[i].cpu().numpy()} XOR: {prediction.item():.4f}")

Using device: cuda

Predictions:
[0. 0.] XOR: 0.0001
[0. 1.] XOR: 0.9995
[1. 0.] XOR: 0.9997
[1. 1.] XOR: 0.0006
