<a href="https://colab.research.google.com/github/shengpu-tang/CS334-S25/blob/main/PyTorch_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch Tutorial

References:
- https://playground.tensorflow.org/
- Quickstart: https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html
- Linear / fully-connected layers: https://pytorch.org/docs/main/nn.html#linear-layers
  - [`torch.nn.Linear`](https://pytorch.org/docs/main/generated/torch.nn.Linear.html)
- Activation functions: found in https://pytorch.org/docs/main/nn.functional.html
- Loss functions: https://pytorch.org/docs/main/nn.html#loss-functions
  - [`torch.nn.BCELoss`](https://pytorch.org/docs/main/generated/torch.nn.BCELoss.html)
  - [`torch.nn.CrossEntropyLoss`](https://pytorch.org/docs/main/generated/torch.nn.CrossEntropyLoss.html)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from tqdm import tqdm

In [None]:
%config InlineBackend.figure_formats = ['svg']

In [None]:
import torch
from torch import nn
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
#@title Run this code block to define plotting helper functions
#@markdown This defines `plot_boundary(X, pred)` which can plot the decision boundary of a neural network
from matplotlib import cm, ticker

MARKERS = ['o', 'v', '+']
COLORS = ['red', 'green', 'blue']
rgb_gradient_cmap = matplotlib.colors.ListedColormap(COLORS)
red_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("RedGradient", [(1, 0, 0, 0), (1, 0, 0, 1)])  # Transparent to red
green_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("GreenGradient", [(0, 1, 0, 0), (0, 1, 0, 1)])  # Transparent to green
blue_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("BlueGradient", [(0, 0, 1, 0), (0, 0, 1, 1)])  # Transparent to blue

def plot_points(X, y):
    for i, label in enumerate(set(y)):
        points = X[y == label]
        marker = MARKERS[i % len(MARKERS)]
        color = COLORS[i % len(COLORS)]
        plt.scatter(points[:,0], points[:,1], marker=marker, color=color)

    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")

def predict(model, x):
    with torch.no_grad():
        o = net(torch.from_numpy(x).float())
    return o.numpy()

def plot_boundary(X, pred, num_classes=2):
    # Define grid limits based on current plot or data
    try:
        x_min, x_max = plt.gca().get_xlim()
        y_min, y_max = plt.gca().get_ylim()
    except:
        x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
        y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1

    # Generate mesh grid
    xs, ys = np.meshgrid(
        np.linspace(x_min, x_max, 200),
        np.linspace(y_min, y_max, 200)
    )
    xys = np.column_stack([xs.ravel(), ys.ravel()])

    # Make predictions over the grid points
    try:
        zs = pred(xys)
        if num_classes == 2:
            # Binary case: plot boundary based on probability threshold 0.5
            zs = zs.reshape(xs.shape)
            plt.contour(xs, ys, (zs >= 0.5).astype(int), colors='grey')
            plt.imshow(zs, cmap="PiYG", vmin=-0.2, vmax=1.2, alpha=0.4, origin='lower', extent=[x_min, x_max, y_min, y_max])
        else:
            # Multi-class case: color - gradient for each
            zzs = np.exp(zs) / np.sum(np.exp(zs), axis=1, keepdims=True)
            Z1 = zzs[:, 0].reshape(xs.shape)
            Z2 = zzs[:, 1].reshape(xs.shape)
            Z3 = zzs[:, 2].reshape(xs.shape)
            plt.imshow(Z1, cmap=red_cmap, origin='lower', alpha=0.6, extent=[x_min, x_max, y_min, y_max])
            plt.imshow(Z2, cmap=green_cmap, origin='lower', alpha=0.6, extent=[x_min, x_max, y_min, y_max])
            plt.imshow(Z3, cmap=blue_cmap, origin='lower', alpha=0.6, extent=[x_min, x_max, y_min, y_max])

            # Multi-class case: boundary - take the argmax over classes
            zs = np.argmax(zs, axis=1).reshape(xs.shape)
            plt.contour(xs, ys, zs, levels=np.arange(num_classes), colors='grey')

            # plt.imshow(zs, cmap=rgb_gradient_cmap, alpha=0.4, origin='lower', extent=[x_min, x_max, y_min, y_max])

    except Exception as e:
        print(f"Error in plotting decision boundary: {e}")
        zs = pred(xys)
        plt.contour(xs, ys, zs.reshape(xs.shape), colors='grey')

    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.title("Decision Boundary")

In [None]:
#@title Run this code block to define datasets
#@markdown - `D1(N)`: binary classificaiton, linear decision boundary
#@markdown - `D2(N)`: binary classificaiton, non-linear decision boundary
#@markdown - `D3(N)`: multiclass classification

class D1(Dataset):
    def __init__(self, N=50):
        super().__init__()
        rng = np.random.default_rng(0)
        self.X = np.r_[2.0 * rng.standard_normal((N//2, 2)) - [2, 2], 2.0 * rng.standard_normal((N//2, 2)) + [2, 2]]
        self.y = np.array([0] * (N//2) + [1] * (N//2))

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]).float(), torch.tensor([self.y[idx]]).float()

    def __len__(self):
        return len(self.X)


class D2(Dataset):
    def __init__(self, N=50):
        super().__init__()
        rng = np.random.default_rng(0)
        X = 3.0 * rng.standard_normal((N, 2))
        y = np.array([0] * N)
        y[np.diag(X@X.T)>10] = 1
        self.X = X
        self.y = y

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]).float(), torch.tensor([self.y[idx]]).float()

    def __len__(self):
        return len(self.X)


class D3(Dataset):
    def __init__(self, N=50):
        super().__init__()
        rng = np.random.default_rng(0)
        X = 3.0 * rng.standard_normal((N, 2))
        y = np.array([0] * N)
        y[np.diag(X@X.T) < 6] = 1
        y[(np.diag(X@X.T) >= 6) & (X@np.array([1,2]) >= 0)] = 2
        self.X = X
        self.y = y

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]).float(), torch.tensor(self.y[idx]).long()

    def __len__(self):
        return len(self.X)

## Backprop demo

Linear regression with mean squared error loss

$$ L = \frac{1}{2} (w x + b - y)^2 $$
$$\frac{\partial L}{\partial w} = (w x + b - y) x$$
$$\frac{\partial L}{\partial b} = (w  x +b - y)$$

In [None]:
net = nn.Sequential(nn.Linear(2, 1))
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

In [None]:
# Initialize and clear gradients
nn.init.zeros_(net[0].weight)
nn.init.zeros_(net[0].bias)
try:
    net[0].weight.grad.zero_()
    net[0].bias.grad.zero_()
except:
    pass

print('Initial parameters')
print('w =', net[0].weight.data)
print('b =', net[0].bias.data)

# Forward pass
x = torch.tensor([1,2]).float()
y_true = torch.tensor([1]).float()
y_hat = ...
loss = ...
print()
print('Forward pass')
print('y_hat =', y_hat.data)
print('loss. =', loss.data)

# Backward pass
...
print()
print('Gradients')
print('dw =', net[0].weight.grad)
print('db =', net[0].bias.grad)

# Update
...
print()
print('Updated parameters')
print('w =', net[0].weight.data)
print('b =', net[0].bias.data)

## Neural Net Training - Example

In [None]:
d1 = D1(100)
plt.figure(figsize=(5,5))
plot_points(d1.X, d1.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plt.show()

### Define the neural network

In [None]:
class Net1(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: put neural net structure here
        ...

    def forward(self, x):
        # TODO: define the forward pass here
        ...
        return ...

In [None]:
torch.random.manual_seed(2)
net = Net1()

In [None]:
# How many float-valued parameters are there?
print('Number of learnable float-valued parameters', count_parameters(net))
print('Neural net architecture:')
print(net)

### Before training...

In [None]:
# What are the parameter values before training?
for name, param in net.named_parameters():
    print(name, '\t', param.data)

In [None]:
# Where is the decision boundary before training?
plt.figure(figsize=(5,5))
plot_points(d1.X, d1.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plot_boundary(d1.X, lambda x: predict(net, x))
plt.show()

### Train!

In [None]:
# Training set, loss function, and optimization algorithm
dataloader = ...
criterion = ...
optimizer = ...

In [None]:
# Trainer loop
for epoch in tqdm(range(1000)):
    for X, y in dataloader:
        ...

### After training...

In [None]:
# What are the parameter values before training?
for name, param in net.named_parameters():
    print(name, '\t', param.data)

In [None]:
# Where is the decision boundary after training?
plt.figure(figsize=(5,5))
plot_points(d1.X, d1.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plot_boundary(d1.X, lambda x: predict(net, x))
plt.show()

## Exercise Q2: non-linear decision boundary

In [None]:
# Data
d2 = D2(100)
plt.figure(figsize=(5,5))
plot_points(d2.X, d2.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plt.show()

In [None]:
# Model
class Net2(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: put neural net structure here
        ...

    def forward(self, x):
        # TODO: define the forward pass here
        ...

net = Net2()

print('Number of learnable float-valued parameters', count_parameters(net))
print('Neural net architecture:')
print(net)

In [None]:
# What are the parameter values before training?
for name, param in net.named_parameters():
    print(name, '\t', param.data)

# Where is the decision boundary before training?
plt.figure(figsize=(5,5))
plot_points(d2.X, d2.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plot_boundary(d2.X, lambda x: predict(net, x))
plt.show()

In [None]:
# Training set, loss function, and optimization algorithm
...

# Trainer loop
...

In [None]:
# What are the parameter values after training?
for name, param in net.named_parameters():
    print(name, '\t', param.data)

# Where is the decision boundary after training?
plt.figure(figsize=(5,5))
plot_points(d2.X, d2.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plot_boundary(d2.X, lambda x: predict(net, x))
plt.show()

## Exercise Q3: three-way classification

In [None]:
# Data...
d3 = D3(100)
plt.figure(figsize=(5,5))
plot_points(d3.X, d3.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plt.show()

In [None]:
# Model...
class Net3(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO: put neural net structure here
        ...

    def forward(self, x):
        # TODO: define the forward pass here
        ...

torch.random.manual_seed(1)
net = Net3()

print('Number of learnable float-valued parameters', count_parameters(net))
print('Neural net architecture:')
print(net)

In [None]:
# Before training...

# What are the parameter values before training?
for name, param in net.named_parameters():
    print(name, '\t', param.data)

# Where is the decision boundary before training?
plt.figure(figsize=(5,5))
plot_points(d3.X, d3.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plot_boundary(d3.X, lambda x: predict(net, x), num_classes=3)
plt.show()

In [None]:
# Train...

# Training set, loss function, and optimization algorithm
...

# Trainer loop
...

In [None]:
# After training...

# What are the parameter values after training?
for name, param in net.named_parameters():
    print(name, '\t', param.data)

# Where is the decision boundary after training?
plt.figure(figsize=(5,5))
plot_points(d3.X, d3.y)
plt.axis('equal')
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plot_boundary(d1.X, lambda x: predict(net, x), num_classes=3)
plt.show()

## Exercise Q4: Architecture Choices & Effects