In [1]:
# pytorch workflow
what_were_covering = {1: "data (prepare and load)",
                      2: "build model",
                      3: "fitting the model to data (training)",
                      4: "making predictions and evaluting a model (inference)",
                      5: "saving and loading a model",
                      6: "putting it all together"}

what_were_covering

{1: 'data (prepare and load)',
 2: 'build model',
 3: 'fitting the model to data (training)',
 4: 'making predictions and evaluting a model (inference)',
 5: 'saving and loading a model',
 6: 'putting it all together'}

In [2]:
pip install torch

Collecting torch
  Downloading torch-2.7.0-cp313-cp313-win_amd64.whl.metadata (29 kB)
Collecting filelock (from torch)
  Using cached filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.10.0 (from torch)
  Using cached typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2025.3.2-py3-none-any.whl.metadata (11 kB)
Collecting setuptools (from torch)
  Downloading setuptools-80.7.1-py3-none-any.whl.metadata (6.6 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Collecting MarkupSafe>=2.0 (from jinja2->torch)
  Downloading MarkupSafe-3.0.2-


[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import torch
from torch import nn
import matplotlib.pyplot as plt
# check the pytorch version
torch.__version__

  cpu = _conversion_method_template(device=torch.device("cpu"))


ModuleNotFoundError: No module named 'matplotlib'

## 1.Data (preparing and loading)
Data can be almost anything... in machine learning.

* Excel speadsheet
* Images of any kind
* Videos (YouTube has lots of data...)
* Audio like songs or podcasts
* DNA
* Text

Machine learning is a game of two parts:

1.   Get data into a numerical representation
2.   Build a model to learn patterns in that numerical representation.
To showcase this, let's create some known data using the linear regression formula.


We'll use a linear regression formula to make a straight line with known **parameters**.

In [None]:
# create known parameters
weight = 0.7
bias = 0.3
# create
start = 0
end = 1
step = 0.02
X  = torch.arange(start,end,step).unsqueeze(dim=1)
y = weight*X + bias
X[:10],y[:10]

In [None]:
len(X),len(y)

In [None]:
# create a train/test split
train_split = int(0.8*len(X))
X_train,y_train = X[:train_split],y[:train_split]
X_test,y_test = X[train_split:],y[train_split:]
len(X_train),len(y_train),len(X_test),len(y_test)

In [None]:
def plot_predictions(train_data = X_train,
                     train_labels=y_train,
                     test_data = X_test,
                     test_labels = y_test,
                     predictions=None):
  plt.figure(figsize=(10,7))

  plt.scatter(train_data,train_labels,c="r",label="Train Data")
  plt.scatter(test_data,test_labels,c="g",label="Test Data")

  if predictions is not None:
    plt.scatter(test_data,predictions,c="b",label="Predictions")
  plt.legend(prop={"size":14});



In [None]:
plot_predictions();

In [None]:
## 2 Build the Model
class LinearRegressionModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.weights = nn.Parameter(torch.randn(1,requires_grad=True,
                                            dtype=torch.float))
    self.bias = nn.Parameter(torch.randn(1,
                                         requires_grad=True,
                                         dtype=torch.float
                                         ))
  def forward(self,x:torch.Tensor)-> torch.Tensor:
    return self.weights*x+self.bias

##PyTorch model building essentials
* torch.nn - contains all of the buildings for computational graphs (a neural network can be considered a computational graph)
* torch.nn.Parameter - what parameters should our model try and learn, often a PyTorch layer from torch.nn will set these for us
* torch.nn.Module - The base class for all neural network modules, if you subclass it, you should overwrite forward()
* torch.optim - this where the optimizers in PyTorch live, they will help with gradient descent
* def forward() - All nn.Module subclasses require you to overwrite forward(), this method defines what happens in the forward computation

extraInfo: https://pytorch.org/tutorials/beginner/ptcheat.html

In [None]:
torch.manual_seed(42)

model_0 = LinearRegressionModel()

list(model_0.parameters())

In [None]:
model_0.state_dict()

In [None]:
weight,bias

##Making prediction using `torch.inference_mode()`

In [None]:
with torch.inference_mode():
  y_pred = model_0(X_test)

with torch.no_grad():
  y_preds = model_0(X_test)

y_pred,y_preds

In [None]:
plot_predictions(predictions=y_preds)

## 3. Train model
The whole idea of training is for a model to move from some unknown parameters (these may be random) to some known parameters.

Or in other words from a poor representation of the data to a better representation of the data.

One way to measure how poor or how wrong your models predictions are is to use a loss function.

* Note: Loss function may also be called cost function or criterion in different areas. For our case, we're going to refer to it as a loss function.
* Things we need to train:

**Loss function:** A function to measure how wrong your model's predictions are to the ideal outputs, lower is better.

**Optimizer:** Takes into account the loss of a model and adjusts the model's parameters (e.g. weight & bias in our case) to improve the loss function - https://pytorch.org/docs/stable/optim.html#module-torch.optim


Inside the optimizer you'll often have to set two parameters:

params - the model parameters you'd like to optimize, for example params=model_0.parameters()


lr (learning rate) - the learning rate is a hyperparameter that defines how big/small the optimizer changes the parameters with each step (a small lr results in small changes, a large lr results in large changes)


And specifically for PyTorch, we need:

* A training loop
* A testing loop

In [None]:
# Check the model parameters
model_0.state_dict()

In [None]:
# Setup a loss function
loss_fn = nn.L1Loss()
# optimizer an optimizer
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.01)

In [None]:
# epochs = 100
# ## training
# for epoch in range(epochs):
#   model_0.train()
#   # 1.Forward pass
#   y_pred = model_0(X_train)
#   # 2.Calculate the loss
#   loss = loss_fn(y_pred,y_train)
#   print(f"loss{loss}")
#   # 3.Optimizer zero grad
#   optimizer.zero_grad()
#   # 4.perform backpropagation on the loss
#   loss.backward()
#   # 5.step the optimizer
#   optimizer.step()

#   model_0.eval()
#   print(model_0.state_dict())

In [None]:
# with torch.inference_mode():
#   y_pred_new = model_0(X_test)

In [None]:
# plot_predictions(predictions = y_pred_new)

In [None]:

torch.manual_seed(42)

# An epoch is one loop through the data... (this is a hyperparameter because we've set it ourselves)
epochs = 200

# Track different values
epoch_count = []
loss_values = []
test_loss_values = []

### Training
# 0. Loop through the data
for epoch in range(epochs):
  # Set the model to training mode
  model_0.train() # train mode in PyTorch sets all parameters that require gradients to require gradients

  # 1. Forward pass
  y_pred = model_0(X_train)

  # 2. Calculate the loss
  loss = loss_fn(y_pred, y_train)

  # 3. Optimizer zero grad
  optimizer.zero_grad()

  # 4. Perform backpropagation on the loss with respect to the parameters of the model (calculate gradients of each parameter)
  loss.backward()

  # 5. Step the optimizer (perform gradient descent)
  optimizer.step() # by default how the optimizer changes will accumulate through the loop so... we have to zero them above in step 3 for the next iteration of the loop

  ### Testing
  model_0.eval() # turns off different settings in the model not needed for evaluation/testing (dropout/batch norm layers)
  with torch.inference_mode(): # turns off gradient tracking & a couple more things behind the scenes - https://twitter.com/PyTorch/status/1437838231505096708?s=20&t=aftDZicoiUGiklEP179x7A
  # with torch.no_grad(): # you may also see torch.no_grad() in older PyTorch code
    # 1. Do the forward pass
    test_pred = model_0(X_test)

    # 2. Calculate the loss
    test_loss = loss_fn(test_pred, y_test)

  # Print out what's happenin'
  if epoch % 10 == 0:
    epoch_count.append(epoch)
    loss_values.append(loss)
    test_loss_values.append(test_loss)
    print(f"Epoch: {epoch} | Loss: {loss} | Test loss: {test_loss}")
    # Print out model state_dict()
    print(model_0.state_dict())

In [None]:
import numpy as np
np.array(torch.tensor(loss_values).numpy()), test_loss_values

In [None]:
# Plot the loss curves
plt.plot(epoch_count, np.array(torch.tensor(loss_values).numpy()), label="Train loss")
plt.plot(epoch_count, test_loss_values, label="Test loss")
plt.title("Training and test loss curves")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend();

In [None]:
from pathlib import Path
# 1. Create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True,exist_ok=True)

# 2. Create model save path
MODEL_NAME = "01_pytorch_workflow_model_1.pth"
MODEL_SAVE_PATH = MODEL_PATH/MODEL_NAME

# 3. Save the model state dict
print(f"Saving model to :{MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(),
           f=MODEL_SAVE_PATH)

In [None]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# Load a PyTorch model

# Create a new instance of lienar regression model
loaded_model_0 = LinearRegressionModel()

# Load the saved model_1 state_dict
loaded_model_0.load_state_dict(torch.load(MODEL_SAVE_PATH))

# Put the loaded model to device
loaded_model_0.to(device)

In [None]:
next(loaded_model_0.parameters()).device

In [None]:
loaded_model_0.state_dict()

In [None]:
# Evaluate loaded model
loaded_model_0.eval()
with torch.inference_mode():
  loaded_model_0_preds = loaded_model_0(X_test)
y_preds == loaded_model_0_preds