<a href="https://colab.research.google.com/github/slowanimals/learn-pytorch/blob/main/01_pytorch_workflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 01. Pytorch Workflows

## Example Pytorch end-to-end workflow

In [None]:
# imports
import torch
from torch import nn  # pytorch's neural network tools
import matplotlib.pyplot as plt  # for visualization
import numpy as np

print(torch.__version__)

### Linear Regression
Using a Linear Regression formula to make a straight line with known parameters

**y = mx + b**

In [None]:
# Create known parameters
weight = 0.7  # m
bias = 0.3  # b

# Create data
X = torch.arange(0.,1.,0.02).unsqueeze(dim=1)  # unsqueeze turns X into a 2D vector of size [50,1]
y = (weight * X) + bias  # y will be the labels

print(X[:10])
print(y[:10])

In [None]:
print(len(X), len(y))

## 1. Creating & Splitting Data

In [None]:
train_split = int(0.8 * len(X))  # 80%, or 40 samples
X_train = X[:train_split]
y_train = y[:train_split]

X_test = X[train_split:]  # 20%, or 10 samples
y_test = y[train_split:]

len(X_train), len(X_test), len(y_train), len(y_test)

### Visualize!

1. We will train the model with the training datasets.
2. Then we will apply it to the test data to make predictions, and compare the model's predictions with the test labels' values.

In [None]:
# plotting training data, test data, & predictions
def plot_predictions(train_data = X_train,
                     train_labels = y_train,
                     test_data = X_test,
                     test_labels = y_test,
                     predictions = None):
  plt.figure(figsize=(7,5))

  # blue training data scatterplot
  plt.scatter(train_data, train_labels, c="b", s=4, label="Training Data")

  # green testing data scatterplot
  plt.scatter(test_data, test_labels, c="g", s=4, label="Testing Data")

  # are there predictions?
  if predictions is not None:
    plt.scatter(test_data, predictions, c='r', s=4, label = "Predictions")

  # show the legend
  plt.legend(prop={"size":14})

In [None]:
plot_predictions()

## 2. Building a Linear Regression Model


The way this model works is:
1. Creates random numbers representing the weight and bias
2. Look at the training samples
3. Runs the weight and bias through the forward function to adjust them to better represent the pattern found in the training samples

**How does it do so?**
Gradient descent & Backpropagation (which is why we do `requires_grad=True`)

In [None]:
class LinearRegressionModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.weight = nn.Parameter(torch.randn(1,  # start with 1 random tensor that will be adjusted
                                            requires_grad=True,  # parameter can be updated with gradient descent
                                            dtype=torch.float))  # requires_grad is default true & float == float32

    self.bias = nn.Parameter(torch.randn(1,
                                         requires_grad=True,
                                         dtype=torch.float))

    # forward method for defining the computation in the model
  def forward(self, x:torch.Tensor) -> torch.Tensor:  # x (input) has to be type torch.Tensor and returns type torch.Tensor
      return (self.weight * x) + self.bias  # y = mx + b

### Checking the Contents of Our Model

In [None]:
# create manual seed in order to have reproducible results
seed = torch.manual_seed(5)

# create instance of model (subclass of nn.Module)
model_0 = LinearRegressionModel()

# check our parameters
list(model_0.parameters())

In [None]:
# list named parameters
model_0.state_dict()

### Making Predictions via `torch.inference_mode()`
Let's see how well our model predicts y_test based on X_test.  
When we pass data through the model, it will be run through the `forward()` method.


In [None]:
X_test[:10], y_test[:10]

In [None]:
with torch.inference_mode():
  y_predict = model_0(X_test)
y_predict

In [None]:
plot_predictions(predictions = y_predict)

Because the parameters are randomly initialized, the predictions are completely random

In [None]:
# re-initiating model for convenience
torch.manual_seed(5)
model_0 = LinearRegressionModel()
model_0.state_dict()

## 3. Train Model
The idea is to move from poor representation of data to a better one   
We're going to use a Loss Function to achieve this goal

In [None]:
# set up a loss function
loss_fn = nn.L1Loss()

# set up an optimizer (stochastic gradient descent)
optimizer = torch.optim.SGD(params = model_0.parameters(),
                            lr=0.01)

### Building the Training & Testing Loops

In [None]:
epochs = 1000 # an epoch is 1 loop through the data (hyperparameter since we set it ourselves)

# track values
epoch_count = []
train_loss_values = []
test_loss_values = []
weight_values = []
bias_values = []

for epoch in range(epochs):  # pass data through a number of epochs

  # Set model to training mode
  model_0.train()  # training mode in Pytorch makes requires_grad = True

  # 1. Forward pass that calls forward() method
  y_pred = model_0(X_train)

  # 2. Calculate the loss
  loss = loss_fn(y_pred, y_train)  # order is prediction, target
  #print(f'Loss: {loss}')

  # 3. Optimizer zero grad
  optimizer.zero_grad()

  # 4. Perform backpropagation on loss with respect to parameters
  loss.backward()

  # 5. Optimizer step (Gradient Descent)
  optimizer.step()  # by default, how the optimizer changes will accumulate through the loop


  ## TESTING
  model_0.eval()  # turns off unneeded settings
  with torch.inference_mode():  # turns off gradient tracking and other stuff
    # 1. Forward pass
    test_pred = model_0(X_test)

    # 2. Calculate loss
    test_loss = loss_fn(test_pred, y_test)

  # Print out what's happening
  if epoch % 100 == 0:
    epoch_count.append(epoch)
    train_loss_values.append(loss)
    test_loss_values.append(test_loss)

    weight_values.append(model_0.state_dict()['weight'].item())
    bias_values.append(model_0.state_dict()['bias'].item())

    print(f'Epoch: {epoch} | Loss: {loss} | Test Loss: {test_loss}')
    #print(model_0.state_dict()['weight'].item(), model_0.state_dict()['bias'].item())
    print(model_0.state_dict())


In [None]:
with torch.inference_mode():
  y_pred_new = model_0(X_test)
  plot_predictions(predictions = y_pred_new)

## Plot Loss Curves

In [None]:
epoch_count

In [None]:
train_loss_values

In [None]:
# test_loss_values needs to be converted into a tensor and then into a numpy array
np.array(torch.tensor(test_loss_values).numpy())

In [None]:
plt.plot(epoch_count,
         torch.tensor(train_loss_values).detach().numpy(),
         label="Train Loss")
plt.plot(epoch_count,
         torch.tensor(test_loss_values).detach().numpy(),
         label="Test Loss")
plt.ylabel("Train Loss")
plt.xlabel("Test Loss")
plt.legend();

In [None]:
plt.plot(epoch_count,
    torch.tensor(weight_values).numpy(),
    label="Weight")
plt.plot(epoch_count,
         torch.tensor(bias_values).numpy(),
         label="Bias")
plt.xlabel("Weight")
plt.ylabel("Bias")
plt.legend();

## 4. Saving a Model

In [None]:
from pathlib import Path

# 1. Create model's directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path
MODEL_NAME = '01_model_0.pt'  # convention is to save pytorch models as .pt or .pth
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save model's state dict
torch.save(obj = model_0.state_dict(), f = MODEL_SAVE_PATH)

In [None]:
!ls -l models

## Loading a Model

In [None]:
# since we only saved the state dict, we create a new instance of the model
loaded_model_0 = LinearRegressionModel()

# load the saved state dict of model_0
loaded_model_0.load_state_dict(torch.load(f = MODEL_SAVE_PATH))

model_0.state_dict() == loaded_model_0.state_dict()

# Putting It All Together

### Imports

In [None]:
import torch
from torch import nn
import matplotlib.pyplot as plt
from pathlib import Path

### Write Device Agnostic Code

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'using device: {device}')

### Creating Data

In [None]:
weight = 0.3
bias = 0.5

data = torch.arange(0,1,0.02).unsqueeze(dim=1)
labels = (data * weight) + bias

train_split = int(0.8 * len(data))

train_data = data[:train_split]
train_labels = labels[:train_split]

test_data = data[train_split:]
test_labels = labels[train_split:]

### Create Model

In [None]:
class LinearRegressionModelV2(nn.Module):
  def __init__(self):
    super().__init__()

    # create linear layers
    self.linearLayer = nn.Linear(in_features = 1,
                                 out_features = 1)  # input and output of size 1

  def forward(self, x: torch.Tensor) -> torch.Tensor:
    return self.linearLayer(x)

torch.manual_seed(5)
model_1 = LinearRegressionModelV2()
model_1.state_dict()

### Set Model to Use Target Device

In [None]:
next(model_1.parameters()).device

In [None]:
model_1.to(device)
next(model_1.parameters()).device

### Create Visualization Function

In [None]:
def plot_predictions(train_data = train_data,
                     test_data = test_data,
                     train_labels = train_labels,
                     test_labels = test_labels,
                     predictions = None):
  plt.figure(figsize=(8,5))

  plt.scatter(train_data, train_labels, s=4, c='b', label = "Training Data")

  plt.scatter(test_data, test_labels, s=4, c='g', label = "Testing Data")

  if predictions is not None:
    plt.scatter(test_data, predictions, s=4, c='r', label = "Predictions")

  plt.legend(prop={"size":14});

### Create Training/Eval Loop with L1Loss and SGD

In [None]:
model_1 = LinearRegressionModelV2()
model_1.to(device)
list(model_1.parameters())

In [None]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(params = model_1.parameters(), lr = 0.001)

In [None]:
torch.manual_seed(5)

epochs = 1000

# put data on same bias to keep device agnostic code
train_data = train_data.to(device)
test_data = test_data.to(device)
train_labels = train_labels.to(device)
test_labels = test_labels.to(device)

for epoch in range(epochs):
  # TRAINING
  model_1.train()

  # forward pass
  model_1_predict = model_1(train_data)

  # calculate loss
  loss = loss_fn(model_1_predict, train_labels)

  # optimizer zero grad
  optimizer.zero_grad()

  # backpropagation
  loss.backward()

  # optimizer gradient descent
  optimizer.step()


  # TESTING
  model_1.eval()
  with torch.inference_mode():
    test_pred = model_1(test_data)

    test_loss = loss_fn(test_pred, test_labels)

    if epoch % 100 == 0:
      print(f'Epoch: {epoch} | Loss: {loss} | Test Loss: {test_loss}')
      print(model_1.state_dict())

In [None]:
with torch.inference_mode():
  plot_predictions(predictions = test_pred.cpu())  # matplotlib requires test_pred to be moved to the cpu

### Save and Load model_1

In [None]:
model_1.state_dict()

In [None]:
from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_SAVE_PATH = MODEL_PATH / "01_model_1.pt"
torch.save(obj = model_1.state_dict(), f = MODEL_SAVE_PATH)

In [None]:
# new instance
loaded_model_1 = LinearRegressionModelV2()

loaded_model_1.load_state_dict(torch.load(f = MODEL_SAVE_PATH))

# move loaded model to device
loaded_model_1.to(device)

In [None]:
next(loaded_model_1.parameters()).device

### Evaluate Loaded Model

In [None]:
loaded_model_1.eval()
with torch.inference_mode():
  loaded_model_1_preds = loaded_model_1(test_data)

test_pred == loaded_model_1_preds  # compare model 1 test predictions with loaded