In [None]:
import torch
from torch import nn
import matplotlib.pyplot as plt

In [None]:
# Project 1 - Linear classifier
# y = mx + b, where m is weight and b is bias

# Create known parameters
weight = 0.7
bias = 0.3

# Create
start = 0
end = 1
step = 0.02
X = torch.arange(start, end, step).unsqueeze(dim = 1)
Y = weight * X + bias

X[:10], Y[:10]

(tensor([[0.0000],
         [0.0200],
         [0.0400],
         [0.0600],
         [0.0800],
         [0.1000],
         [0.1200],
         [0.1400],
         [0.1600],
         [0.1800]]),
 tensor([[0.3000],
         [0.3140],
         [0.3280],
         [0.3420],
         [0.3560],
         [0.3700],
         [0.3840],
         [0.3980],
         [0.4120],
         [0.4260]]))

In [None]:
# Create a train/test split
# 80% data for training, remaining 20% for testing. No validation in this simple model
train_split = int(.8 * len(X))
X_train = X[:train_split]
X_test = X[train_split:]
Y_train = Y[:train_split]
Y_test = Y[train_split:]

len(X_train), len(Y_train), len(X_test), len(Y_test)

(40, 40, 10, 10)

In [None]:
# Create linear regression model class
class LinearRegressionModel(nn.Module): # <- almost everything in PyTorch inherits from nn.Module
  def __init__(self):
    super().__init__()
    self.weights = nn.Parameter(torch.randn((1,), # <- start with a random weight and try to adjust it to the ideal weight
                                            requires_grad=True, # <- can this parameter be updated through gradient descent?
                                            dtype=torch.float)) # <- Python loves the data type torch.float.32

    self.bias = nn.Parameter(torch.randn((1,), # <- start with a random bias and try to adjust it to the ideal weight
                                         requires_grad=True, # <- can this parameter be updated through gradient descent?
                                         dtype=torch.float)) # <- Python loves the data type torch.float.32

  # Forward method to define the computation in the model
  def forward(self, x: torch.Tensor) -> torch.Tensor: # <- "x" is the input data
    return self.weights * x + self.bias # this is the linear regression formula

## Model Building Essentials

* torch.nn - contains all of the building for computational graphs ( a neural network can be considered a computational graph)
* torch.nn.Parameter - what parameters should our model try and learn, often a PyTorch layer from torch.nn will set this for us.
* torch.nn.Module - The base class for all neural network models, if you subclass it, you should overwrite forward()
* torch.optim - this is where the optimizers in Pytorch live, they will help with gradient descent.
* def forward() - All nn.Module subclasses require you to overwrite forward(), this method defines what happens in the forward computation.

### Checking the contents of our PyTorch model
Now we've created a model, lets see whats inside:
So we can check our model parameters or whats inside our module using ,parameters().

In [None]:
# Create a random seed
torch.manual_seed(39)

# Create an instance of the model (this is a subclass of nn.Module)
model_0 = LinearRegressionModel()

# Check out the parameters
list(model_0.parameters())

[Parameter containing:
 tensor([0.4447], requires_grad=True),
 Parameter containing:
 tensor([0.0819], requires_grad=True)]

In [None]:
# List named parameters
model_0.state_dict()

OrderedDict([('weights', tensor([0.4447])), ('bias', tensor([0.0819]))])

In [None]:
weight, bias

(0.7, 0.3)

### Making prediction using 'torch.inference_model()'

To check our model's predictive power, let's see how well it predicts 'y_test' based on 'X_test'.

When we pass data through our model, it's going to run it through the 'forward()' method.'

In [None]:
# Make predictions with model
with torch.no_grad():
  y_preds = model_0(X_test)

y_preds

tensor([[0.4377],
        [0.4466],
        [0.4555],
        [0.4644],
        [0.4733],
        [0.4822],
        [0.4911],
        [0.5000],
        [0.5089],
        [0.5177]])

In [None]:
# Classes in Python
# Great for reusability
# Functions perform actions; classes define blueprints for objects that encapsulate data and actions.
class Pet:
  def __init__(self, name, species):
    self.name = name
    self.species = species

  def introduce(self):
    print(f"Hello, my name is {self.name} and I am a {self.species}.")

dog = Pet(name="Buddy", species="Dog")



In [None]:
dog.introduce()

Hello, my name is Buddy and I am a Dog.


In [None]:
# Can add an attribute not in original class as well

dog.color = "Brown"
dog.color

'Brown'

### Train Model

The whole idea of training is for a model to move from some unknown parameters to some known paramters.

In other words, from a poor representation to a better representation of data.

One way to measure how poor or how wrong your models predictions are is to use a loss function.

**Loss function:** A function to measure how wrong your model's predictions are to the ideal outputs, lower is better.

**Optimizer** Takes into account the loss of a model and adjusts the model's parameters (eg weight & bias) to improve loss function.

Inside the optimizer you'll often have to set two parameters:
- params - the model parameters you'd like to optimize, for example params = model_0.parameters()
- lr (learning rate) - the learning rate is a hyperparameter that defines how big/small the optimizer changes the parameters with each step (a small lr results in small changes, a large lr results in large changes)

For Pytorch specifically, we need:
- A Training Loop
- A Testing Loop

Lots of loss functions on Pytorch with specific syntax

**Mean Absolute Error (MAE)
- abs diff of predicted and actual result of one point and then the mean of the entire range is the MAE

In [None]:
model_0.state_dict()

OrderedDict([('weights', tensor([0.4447])), ('bias', tensor([0.0819]))])

In [None]:
# Setup a loss function - measures how wrong our function is
loss_fn = nn.L1Loss()

# Setup an optimizer (stochastic gradient descent) - adjusts parameters to minimize loss
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.01) # lr = learning rate = possibly the most important hyperparameter you can set


**Q**: Which loss function and optimizer should i use?

**A**: This will be problem specific. But with experience, you'll get an idea of what works and what doesn't with your particular problem set.

For example, for a regression problem (like ours), a loss function of nn.L1Loss() and an optimizer like torch.optim.SGD() will suffice.

But for classification problems like classifying whether a photo is of a dog or cat, you'll likely want to use a loss function of nn.BCELoss() (binary cross entropy loss)


### Building a training loop (and a testing loop) in PyTorch

A couple of things we need in a training loop:

0. Loop through the data
1. Forward pass (this involves data moving through our model's 'forward()' functions) to make predictions on data - also called forward propogation
2. Calculate the loss (compare forward pass predictions to ground truth labels)
3. Optimizer zero grad
4. Loss backward - move backwards through the network to calculate the gradients of each of the parameters of our model with respect to the loss (**backpropagation**)
5. Optimizer step - use the optimizer to adjust our model's parameters to try and improve the loss (**Gradient Descent**)


In [None]:
list(model_0.parameters())

[Parameter containing:
 tensor([0.4447], requires_grad=True),
 Parameter containing:
 tensor([0.0819], requires_grad=True)]

In [None]:
torch.manual_seed(39)

# An epoch is one loop through the data... (this is a hyperparameter since we set it ourselves)
epochs = 100

# Track diffeerent values
epoch_count = []
loss_values = []
test_loss_values = []

### Training
# 0. Loop through the data
for epoch in range (epochs):
  # Set the model to training mode
  model_0.train() # train mode in PyTorch sets all parameters that require gradients to require gradients

  # 1. Forward pass
  y_pred = model_0(X_train)

  # 2. Calculate the loss
  loss = loss_fn(y_pred, Y_train)

  # 3. Optimizer zero grad (Zero to start fresh each forward path)
  optimizer.zero_grad()

  # 4. Perform backpropagation on the loss with respect to the parameters of the model
  loss.backward()

  # 5. Step the optimizer (perform gradient descent)
  optimizer.step() # by default how the optimizer changes will accumulate through the loop so... we have to zero them above in step 3 for the next iteration of the loop.


### Testing
  model_0.eval() # turns off different settings in the model not needed for evaluation/testing (drop out/Batch Norm)
  with torch.inference_mode(): # turns off gradient tracking & a couple more things behind the scene
  # 1. Do the forward pass
   test_pred = model_0(X_test)

  # 2. Calculate the loss
   test_loss = loss_fn(test_pred, Y_test)

# Print what's happening
  if epoch%10 == 0:
    epoch_count.append(epoch)
    loss_values.append(loss)
    test_loss_values.append(test_loss)
    print(f"Epoch: {epoch} | Loss: {loss} | Test loss: {test_loss}")

Epoch: 0 | Loss: 0.31763267517089844 | Test loss: 0.43180543184280396
Epoch: 10 | Loss: 0.20242269337177277 | Test loss: 0.29709547758102417
Epoch: 20 | Loss: 0.08721272647380829 | Test loss: 0.16238555312156677
Epoch: 30 | Loss: 0.029277771711349487 | Test loss: 0.07662559300661087
Epoch: 40 | Loss: 0.024191657081246376 | Test loss: 0.05686289072036743
Epoch: 50 | Loss: 0.02073388360440731 | Test loss: 0.04790344834327698
Epoch: 60 | Loss: 0.01730123721063137 | Test loss: 0.03963091969490051
Epoch: 70 | Loss: 0.013862432911992073 | Test loss: 0.03135838359594345
Epoch: 80 | Loss: 0.010431952774524689 | Test loss: 0.023772722110152245
Epoch: 90 | Loss: 0.006997629068791866 | Test loss: 0.015500170178711414


In [None]:
print(torch.equal(X_train, X_test))

False


In [None]:
with torch.inference_mode():
  y_pred_new = model_0(X_test)

In [None]:
model_0.state_dict()

OrderedDict([('weights', tensor([0.6823])), ('bias', tensor([0.3074]))])

### Saving a model in PyTorch

There are three main methods you should know about for saving and loading models in PyTorch.

1. 'torch.save()' - allows you save a PyTorch object in Python's pickle format
'torch.load()' - allows you load a saved PyTorch object
'torch.nn.Module.load_state_dict()' - this allows to load a model's saved state dictionary

In [None]:
# Saving our PyTorch model
from pathlib import Path

# 1. Create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path
MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

MODEL_SAVE_PATH

# 3. Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(),
           f=MODEL_SAVE_PATH)

Saving model to: models/01_pytorch_workflow_model_0.pth


## Loading a Pytorch model

Since we saved our model's 'state dict() rather than the entire model, we'll create a new instance of our model class and load the saved 'state dict()' into that.

In [None]:
model_0.state_dict()

OrderedDict([('weights', tensor([0.6823])), ('bias', tensor([0.3074]))])

In [None]:
# To load in a saved state_dict we have to instantiate a new instance of our model class
loaded_model_0 = LinearRegressionModel()

# Load the saved state_dict of model_0 (this will update the new instance with updated parameters)
loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

<All keys matched successfully>

In [None]:
loaded_model_0.state_dict()

OrderedDict([('weights', tensor([0.6823])), ('bias', tensor([0.3074]))])

In [None]:
# Make some predictions with our loaded model
loaded_model_0.eval()
with torch.inference_mode():
  loaded_model_preds = loaded_model_0(X_test)

loaded_model_preds

tensor([[0.8533],
        [0.8669],
        [0.8806],
        [0.8942],
        [0.9078],
        [0.9215],
        [0.9351],
        [0.9488],
        [0.9624],
        [0.9761]])

In [None]:
# Make some models preds
model_0.eval()
with torch.inference_mode():
  y_preds = model_0(X_test)

y_preds

tensor([[0.8533],
        [0.8669],
        [0.8806],
        [0.8942],
        [0.9078],
        [0.9215],
        [0.9351],
        [0.9488],
        [0.9624],
        [0.9761]])

In [None]:
# Compare loaded model preds with original model preds
y_preds == loaded_model_preds

tensor([[True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True]])

In [None]:
y_preds

tensor([[0.8533],
        [0.8669],
        [0.8806],
        [0.8942],
        [0.9078],
        [0.9215],
        [0.9351],
        [0.9488],
        [0.9624],
        [0.9761]])

### Putting it all together

Let's go back through the steps aboe and see it all in one place



In [None]:
import torch
from torch import nn
import matplotlib.pyplot as plt

# Check PyTorch Version
torch.__version__

'2.8.0+cu126'

Create Device-agnostic code.
This means if we've got access to a GPU, our code will use it (for potentially faster computing).
If no GPU is available, the code will default to using CPU

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Create some data using the linear regression formula of y = weight * X + bias
weight = .2
bias = .8

# Create range values
start = 0
end = 1
step = .02

# Create X and y (features and labels)
X = torch.arange(start, end, step).unsqueeze(dim=1) # without unsqueeze, errors will pop up
y = weight * X + bias

In [None]:
# Stopped at 7:47 - recreating process