# Neural Networks — Try it in PyTorch

This is an **optional** hands-on companion to [Chapter 3](https://robennals.github.io/ai-explained/03-neurons). You'll build neurons from scratch, see why a single neuron can't solve XOR, and watch a two-layer network learn a nonlinear decision boundary.

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

## A Single Neuron

An artificial **neuron** takes some numbers as input, multiplies each one by a **weight** (how important that input is), adds them up, adds a **bias** (a constant offset), and passes the result through an **activation function** that squashes the output into a useful range. That's it — the whole thing is just multiply, add, squash.

In [None]:
# A neuron: weighted sum of inputs + bias, passed through an activation function
# output = sigmoid(w1*x1 + w2*x2 + b)

inputs = torch.tensor([0.8, 0.2])     # Two inputs
weights = torch.tensor([0.5, -0.3])   # How much each input matters
bias = torch.tensor(0.1)               # Shifts the threshold

# Step 1: Weighted sum + bias
z = torch.dot(weights, inputs) + bias
print(f"Inputs:  {inputs.tolist()}")
print(f"Weights: {weights.tolist()}")
print(f"Bias:    {bias.item()}")
print(f"Weighted sum: {weights[0]}×{inputs[0]} + {weights[1]}×{inputs[1]} + {bias.item()} = {z.item():.3f}")

# Step 2: Activation function (sigmoid squashes to 0-1)
output = torch.sigmoid(z)
print(f"After sigmoid: {output.item():.3f}")

## The Sigmoid Function

In [None]:
# Sigmoid squashes any number into the range (0, 1)
x = torch.linspace(-8, 8, 200)
y = torch.sigmoid(x)

plt.figure(figsize=(8, 3))
plt.plot(x.numpy(), y.numpy(), linewidth=2)
plt.axhline(y=0.5, color='gray', linestyle='--', alpha=0.3)
plt.axvline(x=0, color='gray', linestyle='--', alpha=0.3)

for val in [-4, -2, 0, 2, 4]:
    sv = torch.sigmoid(torch.tensor(float(val)))
    plt.plot(val, sv.item(), 'ro', markersize=6)
    plt.annotate(f'σ({val})={sv.item():.2f}', (val, sv.item()),
                textcoords="offset points", xytext=(10, 10), fontsize=8)

plt.xlabel("Input")
plt.ylabel("Output")
plt.title("Sigmoid: squashes everything to (0, 1)")
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Logic Gates with Neurons

In [None]:
# A single neuron can implement logic gates by choosing the right weights

def neuron(x1, x2, w1, w2, b):
    """Single neuron with sigmoid activation."""
    z = w1 * x1 + w2 * x2 + b
    return torch.sigmoid(torch.tensor(float(z)))

# AND gate: both inputs must be high
print("AND gate (w1=10, w2=10, b=-15):")
for x1, x2 in [(0,0), (0,1), (1,0), (1,1)]:
    out = neuron(x1, x2, 10, 10, -15)
    print(f"  ({x1}, {x2}) → {out.item():.4f}  ≈ {round(out.item())}")

print()

# OR gate: at least one input must be high
print("OR gate (w1=10, w2=10, b=-5):")
for x1, x2 in [(0,0), (0,1), (1,0), (1,1)]:
    out = neuron(x1, x2, 10, 10, -5)
    print(f"  ({x1}, {x2}) → {out.item():.4f}  ≈ {round(out.item())}")

## XOR: A Single Neuron Fails

Now let's try to **train** a neuron — let the computer find the right weights automatically using gradient descent (from Chapter 2).

Some new PyTorch tools:
- `nn.Linear(2, 1)` — a neuron with 2 inputs and 1 output. It handles the weights and bias for you. (Internally it uses matrix multiplication — see Chapter 5.)
- `nn.Sequential(...)` — chains layers together: the output of one feeds into the next.
- `nn.BCELoss()` — a loss function for yes/no problems. It measures how far the prediction is from the correct answer.
- `torch.optim.SGD(...)` — an **optimizer** that does the "take a step downhill" part of gradient descent automatically.

In [None]:
# XOR: output is 1 when inputs differ, 0 when they're the same
# A single neuron CANNOT learn this!

X = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)
y = torch.tensor([[0],[1],[1],[0]], dtype=torch.float32)

# Single neuron (1 layer, no hidden units)
model = nn.Sequential(nn.Linear(2, 1), nn.Sigmoid())
optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
loss_fn = nn.BCELoss()

losses = []
for epoch in range(1000):
    pred = model(X)
    loss = loss_fn(pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

plt.figure(figsize=(8, 3))
plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Single neuron on XOR: loss gets stuck!")
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("Single neuron predictions on XOR:")
with torch.no_grad():
    for i in range(4):
        pred = model(X[i]).item()
        print(f"  ({X[i,0].int().item()}, {X[i,1].int().item()}) → {pred:.3f}  (target: {y[i].item():.0f})")
print("\nThe single neuron can't separate XOR — it needs a hidden layer!")

## Two Layers Solve XOR

In [None]:
# With a hidden layer, the network CAN learn XOR!
# Note: nn.Linear performs matrix multiplication — see Chapter 5 for the full explanation

torch.manual_seed(42)
model = nn.Sequential(
    nn.Linear(2, 4),    # Hidden layer: 2 inputs → 4 hidden neurons
    nn.Sigmoid(),
    nn.Linear(4, 1),    # Output layer: 4 hidden → 1 output
    nn.Sigmoid()
)

optimizer = torch.optim.SGD(model.parameters(), lr=2.0)
loss_fn = nn.BCELoss()

losses = []
for epoch in range(2000):
    pred = model(X)
    loss = loss_fn(pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

plt.figure(figsize=(8, 3))
plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Two-layer network on XOR: loss goes to zero!")
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("Two-layer network predictions on XOR:")
with torch.no_grad():
    for i in range(4):
        pred = model(X[i]).item()
        print(f"  ({X[i,0].int().item()}, {X[i,1].int().item()}) → {pred:.3f}  (target: {y[i].item():.0f})")

## Decision Boundaries

In [None]:
# Visualize what the two-layer network learned
# The decision boundary shows where the network switches from 0 to 1

xx, yy = torch.meshgrid(torch.linspace(-0.5, 1.5, 100),
                         torch.linspace(-0.5, 1.5, 100), indexing='ij')
grid = torch.stack([xx.flatten(), yy.flatten()], dim=1)

with torch.no_grad():
    zz = model(grid).reshape(100, 100)

plt.figure(figsize=(6, 5))
plt.contourf(xx.numpy(), yy.numpy(), zz.numpy(), levels=20, cmap='RdYlBu_r', alpha=0.8)
plt.colorbar(label='Network output')

colors = ['blue', 'red', 'red', 'blue']
for i in range(4):
    plt.plot(X[i,0], X[i,1], 'o', color=colors[i], markersize=12,
            markeredgecolor='black', markeredgewidth=2)
    plt.annotate(f'XOR={y[i].item():.0f}', (X[i,0]+0.05, X[i,1]+0.08), fontsize=10)

plt.xlabel("x₁")
plt.ylabel("x₂")
plt.title("Decision boundary: the network learned to separate XOR!")
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

---

*This notebook accompanies [Chapter 3: Neural Networks](https://robennals.github.io/ai-explained/03-neurons). The interactive widgets in the web version let you explore these concepts visually.*