# Challenges

## Installs and Imports

In [None]:
%pip install torch numpy matplotlib requests torchvision

In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.functional as F

## Challenge 1

- This challenge will test your knowledge of PyTorch tensors.
<br/>
1. Create a tensor of size 10x10 with all elements equal to 0.5.
2. Create a tensor with shape (1, 3, 3) with three `3`s, three `2`s, and three `4`s


In [None]:
x1 = torch.full((10,10), 0.5)
print(x1)
print(x1.shape)

In [7]:
x2 = torch.tensor(
[[
    [3,3,3],
    [2,2,2],
    [4,4,4]]]
)
print(x2)
print(x2.shape)

tensor([[[3, 3, 3],
         [2, 2, 2],
         [4, 4, 4]]])
torch.Size([1, 3, 3])


## Challenge 2

This challenge will test your understanding of Linear Algebra's connection to deep learning.
<br/>
1. You are given an input with shape (1, 3) and a model with one layer, with weights in the shape (3, 1). There is no bias.
2. The model also has the sigmoid activation function which takes the output of the weights as its input (remember what it spits out?).
3. Identity what values the weights should be if the goal of this model is to map inputs with bigger values on the left to 0, and inputs with bigger values on the right to 1.
4. Example `[1, 0, 0]` -> 0, `[1, 1, 3]` -> 1

NOTE: Applying Deep Learning to solve this problem is silly but its a good exercise.


In [None]:
x = torch.randn(1,3)

w1, w2, w3 = None, None, None

w = torch.tensor([w1, w2, w3], dtype=torch.float32)

act = F.sigmoid

In [None]:
print(x)
print()

print(f'output = {act(x @ w)}')

## Challenge 3

- Run the following code.
- Be able to describe (almost all) of what is going on.
- Play around with the model, the optimizer, etc.

In [9]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [None]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [11]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [17]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [18]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [19]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [20]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train() # this allows the models weights to be updated (among other things)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X) # make prediction
        loss = loss_fn(pred, y) # calculate error (i.e. loss)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad() # reset the gradients for each parameter

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [21]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval() # this will prevent the model's weights from being updated.
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")