In [1]:
import torch
from torch import nn # building blocks for neural networks
import matplotlib.pyplot as plt

torch.__version__

device = "cuda" if torch.cuda.is_available() else "cpu"

## 1. Prepare & Load Data

In [2]:
# Create linear data
X = torch.arange(0, 1, 0.02).unsqueeze(dim=1)
y = .7 * X + .3

# Split into train & test
split = int(.8 * len(X))
X_train, y_train = X[:split], y[:split]
X_test, y_test = X[split:], y[split:]

## 2. Build Model

In [3]:
# Create Linear Regression Model Class
class LinearRegressionNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(1, 
                                               requires_grad=True, #Can this be updated via gradient descent?
                                               dtype=torch.float))
        self.bias = nn.Parameter(torch.randn(1, 
                                               requires_grad=True,
                                               dtype=torch.float))
        
    def forward(self, x):
        return self.weights * x + self.bias

In [4]:
# Set seed for reproducibility
torch.manual_seed(42)

# Instantiate the model
model = LinearRegressionNN()

# Send model to gpu if available
model.to(device)

# Check model parameters and device
print(list(model.parameters()))
next(model.parameters()).device

[Parameter containing:
tensor([0.3367], requires_grad=True), Parameter containing:
tensor([0.1288], requires_grad=True)]


device(type='cpu')

In [5]:
model.state_dict()

OrderedDict([('weights', tensor([0.3367])), ('bias', tensor([0.1288]))])

## Training Loop

In [6]:
# Instantiate loss function
loss_fn = nn.L1Loss()
# Instantiate optimizer
optim = torch.optim.SGD(params=model.parameters(), lr=0.01)

# Loop through data
epochs = 100
for epoch in range(epochs):
    model.train()
    # Making predictions on training data
    y_preds = model(X_train)
    
    # Calculate Training Loss
    train_loss = loss_fn(y_preds, y_train)
    
    # Optimizer
    optim.zero_grad()
    
    # Back Prop
    train_loss.backward()
    
    optim.step()
    
    with torch.inference_mode():
        y_test_preds = model(X_test)
        if epoch % 10 == 0:
            test_loss = loss_fn(y_test_preds, y_test)
            print(f'Epoch: {epoch}, train loss: {train_loss} test loss: {test_loss}')
    

Epoch: 0, train loss: 0.31288138031959534 test loss: 0.48106518387794495
Epoch: 10, train loss: 0.1976713240146637 test loss: 0.3463551998138428
Epoch: 20, train loss: 0.08908725529909134 test loss: 0.21729660034179688
Epoch: 30, train loss: 0.053148526698350906 test loss: 0.14464017748832703
Epoch: 40, train loss: 0.04543796554207802 test loss: 0.11360953003168106
Epoch: 50, train loss: 0.04167863354086876 test loss: 0.09919948130846024
Epoch: 60, train loss: 0.03818932920694351 test loss: 0.08886633068323135
Epoch: 70, train loss: 0.03476089984178543 test loss: 0.0805937647819519
Epoch: 80, train loss: 0.03132382780313492 test loss: 0.07232122868299484
Epoch: 90, train loss: 0.02788739837706089 test loss: 0.06473556160926819


In [7]:
list(model.parameters())

[Parameter containing:
 tensor([0.5784], requires_grad=True),
 Parameter containing:
 tensor([0.3513], requires_grad=True)]

## Saving the model

In [8]:
from pathlib import Path

# 1. Create models directory 
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path 
MODEL_NAME = "01_pytorch_workflow_model_1.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save the model state dict 
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model.state_dict(), # only saving the state_dict() only saves the models learned parameters
           f=MODEL_SAVE_PATH)

Saving model to: models/01_pytorch_workflow_model_1.pth


## Loading the model

In [10]:
# Instantiate a fresh instance of LinearRegressionModelV2
loaded_model = LinearRegressionNN()

# Load model state dict 
loaded_model.load_state_dict(torch.load(MODEL_SAVE_PATH))

# Put model to target device (if your data is on GPU, model will have to be on GPU to make predictions)
loaded_model.to(device)

print(f"Loaded model:\n{loaded_model}")
print(f"Model on device:\n{next(loaded_model.parameters()).device}")

Loaded model:
LinearRegressionNN()
Model on device:
cpu
