# Lab 4: PyTorch Fundamentals

**Day 2 - Deep Learning**

| Duration | Difficulty | Prerequisites |
|----------|------------|---------------|
| 90 min | Intermediate | Lab 3 |

## Learning Objectives

- Work with PyTorch tensors
- Understand automatic differentiation (autograd)
- Build neural networks with nn.Module
- Train a model with backpropagation

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

# Set random seed
torch.manual_seed(42)
np.random.seed(42)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

---

## Exercise 1: Tensor Basics

Tensors are the fundamental data structure in PyTorch (like NumPy arrays, but GPU-capable).

**Your Task:** Create and manipulate tensors.

In [None]:
def create_tensors():
    """Create various tensors."""
    # TODO: Create a tensor from a Python list [1, 2, 3, 4]
    tensor_from_list = None
    
    # TODO: Create a 3x3 tensor of zeros
    zeros = None
    
    # TODO: Create a 3x3 tensor of ones
    ones = None
    
    # TODO: Create a 3x3 tensor with random values (uniform 0-1)
    random_tensor = None
    
    # TODO: Create a tensor from numpy array
    np_array = np.array([[1, 2], [3, 4]])
    from_numpy = None
    
    return tensor_from_list, zeros, ones, random_tensor, from_numpy

In [None]:
def tensor_operations():
    """Perform tensor operations."""
    a = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
    b = torch.tensor([[5, 6], [7, 8]], dtype=torch.float32)
    
    # TODO: Element-wise addition
    add_result = None
    
    # TODO: Element-wise multiplication
    mul_result = None
    
    # TODO: Matrix multiplication (use @ or torch.matmul)
    matmul_result = None
    
    # TODO: Sum all elements
    sum_result = None
    
    # TODO: Mean of all elements
    mean_result = None
    
    return add_result, mul_result, matmul_result, sum_result, mean_result

In [None]:
# Test Exercise 1
tensors = create_tensors()
print("Created tensors:")
for i, t in enumerate(tensors):
    if t is not None:
        print(f"  Tensor {i+1}: shape={t.shape}, dtype={t.dtype}")

ops = tensor_operations()
print("\nTensor operations:")
names = ['Add', 'Mul', 'MatMul', 'Sum', 'Mean']
for name, result in zip(names, ops):
    if result is not None:
        print(f"  {name}: {result}")

---

## Exercise 2: Automatic Differentiation

PyTorch's autograd automatically computes gradients.

**Your Task:** Use autograd to compute derivatives.

In [None]:
def compute_gradients():
    """
    Compute gradients using autograd.
    
    Given y = x^2 + 3x + 1, compute dy/dx at x=2
    Expected: dy/dx = 2x + 3 = 2(2) + 3 = 7
    """
    # TODO: Create tensor x=2 with requires_grad=True
    x = None
    
    # TODO: Compute y = x^2 + 3x + 1
    y = None
    
    # TODO: Compute gradient (backward pass)
    # y.backward()
    
    # TODO: Return the gradient (x.grad)
    return None

In [None]:
def chain_rule_example():
    """
    Demonstrate chain rule: if y = f(g(x)), then dy/dx = df/dg * dg/dx
    
    Let g(x) = 2x, f(g) = g^2
    Then y = (2x)^2 = 4x^2
    dy/dx = 8x, so at x=3, dy/dx = 24
    """
    # TODO: Create x=3 with gradient tracking
    x = None
    
    # TODO: Compute g = 2x
    g = None
    
    # TODO: Compute y = g^2
    y = None
    
    # TODO: Backward pass and return gradient
    return None

In [None]:
# Test Exercise 2
grad1 = compute_gradients()
print(f"Gradient of x^2 + 3x + 1 at x=2: {grad1} (expected: 7)")

grad2 = chain_rule_example()
print(f"Gradient of (2x)^2 at x=3: {grad2} (expected: 24)")

---

## Exercise 3: Building Neural Networks with nn.Module

**Your Task:** Create a neural network using PyTorch's nn.Module.

In [None]:
class SimpleNet(nn.Module):
    """
    Simple neural network: 2 inputs -> 4 hidden -> 1 output
    """
    def __init__(self):
        super().__init__()
        # TODO: Define layers
        # self.layer1 = nn.Linear(input_size, hidden_size)
        # self.layer2 = nn.Linear(hidden_size, output_size)
        # self.activation = nn.ReLU()
        pass
    
    def forward(self, x):
        """
        Forward pass: x -> layer1 -> relu -> layer2 -> output
        """
        # TODO: Implement forward pass
        pass

In [None]:
class FlexibleNet(nn.Module):
    """
    Flexible network with configurable architecture.
    """
    def __init__(self, layer_sizes):
        """
        Args:
            layer_sizes: List like [input, hidden1, hidden2, output]
        """
        super().__init__()
        
        # TODO: Create layers dynamically
        # Hint: Use nn.ModuleList to store layers
        # self.layers = nn.ModuleList()
        # for i in range(len(layer_sizes) - 1):
        #     self.layers.append(nn.Linear(...))
        pass
    
    def forward(self, x):
        # TODO: Pass through all layers with ReLU
        # Apply ReLU to all except the last layer
        pass

In [None]:
# Test Exercise 3
simple_net = SimpleNet()
print("SimpleNet architecture:")
print(simple_net)

# Count parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

if hasattr(simple_net, 'layer1'):
    print(f"Total parameters: {count_parameters(simple_net)}")
    
    # Test forward pass
    test_input = torch.randn(5, 2)
    output = simple_net(test_input)
    print(f"Input shape: {test_input.shape}, Output shape: {output.shape}")

---

## Exercise 4: Training Loop

**Your Task:** Train a network to learn the XOR function.

In [None]:
# XOR dataset
X_xor = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_xor = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)

print("XOR Truth Table:")
for x, y in zip(X_xor, y_xor):
    print(f"  {x.tolist()} -> {y.item()}")

In [None]:
class XORNet(nn.Module):
    """Network to solve XOR problem."""
    def __init__(self):
        super().__init__()
        # XOR needs a hidden layer!
        # TODO: Define layers
        # 2 inputs -> 4 hidden (with ReLU) -> 1 output (with Sigmoid)
        pass
    
    def forward(self, x):
        # TODO: Implement forward pass
        pass

In [None]:
def train_xor_network(model, X, y, epochs=1000, lr=0.1):
    """
    Train the XOR network.
    
    Steps in each epoch:
    1. Forward pass: predictions = model(X)
    2. Compute loss: loss = criterion(predictions, y)
    3. Backward pass: loss.backward()
    4. Update weights: optimizer.step()
    5. Zero gradients: optimizer.zero_grad()
    """
    # TODO: Define loss function (BCELoss for binary classification)
    criterion = None
    
    # TODO: Define optimizer (Adam or SGD)
    optimizer = None
    
    losses = []
    
    for epoch in range(epochs):
        # TODO: Implement training step
        # 1. Forward pass
        # 2. Compute loss
        # 3. Zero gradients
        # 4. Backward pass
        # 5. Update weights
        
        pass
        
        # Record loss (uncomment when implemented)
        # losses.append(loss.item())
    
    return losses

In [None]:
# Test Exercise 4
xor_model = XORNet()

if hasattr(xor_model, 'layer1'):
    print("Before training:")
    with torch.no_grad():
        predictions = xor_model(X_xor)
        for x, pred, actual in zip(X_xor, predictions, y_xor):
            print(f"  {x.tolist()} -> {pred.item():.4f} (expected {actual.item()})")
    
    # Train
    losses = train_xor_network(xor_model, X_xor, y_xor, epochs=2000, lr=0.5)
    
    if losses:
        print("\nAfter training:")
        with torch.no_grad():
            predictions = xor_model(X_xor)
            for x, pred, actual in zip(X_xor, predictions, y_xor):
                print(f"  {x.tolist()} -> {pred.item():.4f} (expected {actual.item()})")
        
        # Plot loss
        plt.plot(losses)
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('XOR Training Loss')
        plt.show()
else:
    print("Implement XORNet class")

---

## Exercise 5: Using nn.Sequential

**Your Task:** Build networks quickly using nn.Sequential.

In [None]:
def create_sequential_network():
    """
    Create a network using nn.Sequential.
    
    Architecture: 10 -> 64 (ReLU) -> 32 (ReLU) -> 1 (Sigmoid)
    """
    # TODO: Use nn.Sequential to define the network
    # model = nn.Sequential(
    #     nn.Linear(...),
    #     nn.ReLU(),
    #     ...
    # )
    model = None
    
    return model

In [None]:
# Test Exercise 5
seq_model = create_sequential_network()

if seq_model is not None:
    print("Sequential model:")
    print(seq_model)
    print(f"\nTotal parameters: {count_parameters(seq_model)}")
    
    # Test
    test_input = torch.randn(5, 10)
    output = seq_model(test_input)
    print(f"Output shape: {output.shape}")
else:
    print("Implement create_sequential_network()")

---

## Exercise 6: Complete Training Pipeline

**Your Task:** Train a classifier on synthetic data.

In [None]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

# Generate data
X, y = make_moons(n_samples=500, noise=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)

# Visualize
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', alpha=0.6)
plt.title('Two Moons Dataset')
plt.show()

In [None]:
def train_classifier(X_train, y_train, X_test, y_test, epochs=200):
    """
    Complete training pipeline for binary classification.
    
    Returns:
        model: Trained model
        train_losses: List of training losses
        test_accuracies: List of test accuracies
    """
    # TODO: Create model (2 -> 16 -> 8 -> 1 with sigmoid output)
    model = None
    
    # TODO: Define loss and optimizer
    criterion = None
    optimizer = None
    
    train_losses = []
    test_accuracies = []
    
    for epoch in range(epochs):
        # TODO: Training step
        pass
        
        # TODO: Evaluate on test set (every 10 epochs)
        # Calculate accuracy = (predictions > 0.5) == y_test
    
    return model, train_losses, test_accuracies

In [None]:
# Test Exercise 6
result = train_classifier(X_train_t, y_train_t, X_test_t, y_test_t, epochs=500)

if result[0] is not None:
    model, losses, accs = result
    
    # Plot results
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    
    axes[0].plot(losses)
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training Loss')
    
    axes[1].plot(range(0, 500, 10), accs)
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy')
    axes[1].set_title('Test Accuracy')
    
    plt.tight_layout()
    plt.show()
    
    print(f"Final test accuracy: {accs[-1]:.2%}")
else:
    print("Implement train_classifier()")

---

## Checkpoint

Congratulations! You've completed Lab 4.

### Key Takeaways:
- Tensors are GPU-ready arrays with autograd support
- nn.Module is the base class for all models
- Training loop: forward -> loss -> backward -> update
- nn.Sequential for quick model building

**Next:** Lab 5 - NLP Basics