In [None]:
import torch
from torch import nn  # nn contains all of PyTorch's building blocks for neural networks
import matplotlib.pyplot as plt
import numpy as np

print(f"GPU Available: {torch.cuda.get_device_name(0)}")

# Check PyTorch version
print("Torch Version", torch.__version__)

### Creating a simple dataset using the linear regression formula

In [None]:
# Create *known* parameters
weight = 0.7
bias = 0.3

# Create
start = 0
end = 1
step = 0.02

x = torch.arange(start, end, step).unsqueeze(dim=1)
y = weight * x + bias

# x[:10], y[:10]

In [None]:
# len(x), len(y)

### Splitting data into training and test sets

In [None]:
# Create a train/test split
train_split = int(0.8 * len(x))
x_train, y_train = x[:train_split], y[:train_split]
x_test, y_test = x[train_split:], y[train_split:]
len(x_train), len(y_train), len(x_test), len(y_test)

### Exploring data

In [None]:
def plot_predictions(
    train_data=x_train,
    train_labels=y_train,
    test_data=x_test,
    test_labels=y_test,
    predictions=None,
):
    """
    Plots training data, test data and compares predictions.
    """
    plt.figure(figsize=(10, 7))

    # Plot training data in blue
    plt.scatter(train_data, train_labels, s=4, c="b", label="Training data")

    # Plot test data in green
    plt.scatter(test_data, test_labels, s=4, c="g", label="Testing data")

    # Are there predictions?
    if predictions is not None:
        # Plot the predictions if they exists
        plt.scatter(test_data, predictions, s=4, c="r", label="Predictions")

    # Show the legend
    plt.legend(prop={"size": 14})

In [None]:
plot_predictions()

### Building first PyTorch model!
 * start with randon values (weight & bias)
 * look at the training data and adjust the random values to better represent (or get closer to) the ideal values (the weight and bias values we used to create the data)


How does it do so?
Through two main algorithms:
1. Gradient descent
2. Backpropagation

In [None]:
import torch
from torch import nn

# Create a linerar regression model class
class LinearRegressionModel(
    nn.Module  # <- nn.Module contains all the building blocks for neural network
):  # <- almost everything in PyTorch inherits from nn.Module
    def __init__(self):
        super().__init__()

        # Initalize model parameters
        self.weight = nn.Parameter(
            torch.randn(
                1,  # <- start with a random weight and try to adjust it to the ideal weight
                requires_grad=True,  # <- can this parameter be updated via gradient descent?
                dtype=torch.float32,  # PyTorch loves the datatype torch.float32
            )
        )  # <- PyTorch loves the datatype torch.float32

        self.bias = nn.Parameter(
            torch.randn(
                1,  # <- start with a random bias and try to adjust it to the ideal bias
                requires_grad=True,  # <- can this parameter be updated via gradient descent?
                dtype=torch.float32,  # PyTorch loves the datatype torch.float32
            )
        )

    # Forward method to define the computation in the model, all subclasses of nn.Module need to overwrite forward method.
    # This defines the forward computation of the model
    def forward(self, x: torch.Tensor) -> torch.Tensor:  # <- "x" is the input data
        return self.weight * x + self.bias  # this is the linear regression formula

### PyTorch model building essentials

1. **torch.nn** - contains all of the buildings for computational graphs (a neural network can be considered a computational graph)

2. **torch.nn.Parameter** - what parameters should our model try and learn, often a PyTorch layer from torch.nn will set these for us.

3. **torch.nn.Module** - The base class for all neural network modules, if you subclass it, you should overwrite forward()

4. **torch.optim** - this where the optimizers in PyTorch live, they will help with gradient descent

5. **def forward()** - All nn.Module subclasses require you to overwrite forward(), this method defines what happens in the forward computation.

In [None]:
# Create a random seed
torch.manual_seed(42)

# Create an instance of the model (the model is a subclass of nn.Module)
model_0 = LinearRegressionModel()

# Checkout out the parameters
list(model_0.parameters())

In [None]:
# List names parameters
model_0.state_dict()

### Making predictions using `torch.inference_mode()`

To check our model's predictive power, let's see how well it predicts `y_test` based on `x_test`.

When we pass data through our mode, it's going it through the forward() method.

In [None]:
x_test

In [None]:
# Make predictions with model
with torch.inference_mode():
    y_preds = model_0(x_test)
y_preds

In [None]:
plot_predictions(predictions=y_preds)

### 3. Train model

The whole idea of training is for a model to move from some *unknown* parameterrs (these may be random) to some *known* parameters.

or in other words from a poor representation of the data to a better representation of the data.

One ways to measure how poor or how wrong your models predictions are is to use a loss function.

* Note: Loss function may also be called cost function or criterion in different areas. For our case, we're going to refer to it as a loss function.

Things we need to train:

* **Loss function:** A function to measure how wrong your model's predictions are to the ideal outputs, lower it better.

* **Optimizer:** Takes into account the loss of a model and adjusts the model's parameters (e.g. weight & bias)

And specifically for PyTorch, we need:
* A training loop
* A testing loop

In [None]:
list(model_0.parameters())

In [None]:
model_0.state_dict()

In [None]:
# Setup a loss function
loss_fn = nn.L1Loss()

# Setup an optiomizer (stochastic gradient descent)
optimizer = torch.optim.SGD(
    params=model_0.parameters(), lr=0.01
)

### Building a training loop (and a testing toop) in PyTorch

A coupple of things we need in a training loop:
0. Loop through the data and do...
1. forward pass (this involves data moving through our model's `forward()` functions) - also called forward propagation
2. Calculate the loss (compare forward pass predictions to ground truth labels)
3. Optimizer zero grad
4. Loss backward - move backwards through the network to calculate the gradients of each of the parameters of our model with respest to the loss (**backpropagation**)
5. Optimizer step - use the optimizer to adjust our model's parameters to try and improve the loss (**gradient descent**)

In [None]:
# An epoch is one loop through the data...(this is a hyperparameter because we've set it ourselves)
epochs = 170

# Track different values
epoch_count = []
train_loss = []
train_loss_values = []
test_loss_values = []

### Training
# 0. Loop through the data
for epoch in range(epochs):
    # Set the model to training mode
    model_0.train()  # train mode in PyTorch sets all parameters that require gradients to require gradients

    # 1. Forward pass
    y_preds = model_0(x_train)

    # 2. Calculate the loss
    loss = loss_fn(y_preds, y_train)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Perform backpropagation on the loss with respect to the parameters of the model
    loss.backward()

    # 5. Step the optimizer (perform gradient descent)
    optimizer.step()  # by default how the optimizer changes will accumulate through the loop so... we have to zero them above in step 3 for the next iteration of the loop

    # Testing works with test data, training works with training data
    model_0.eval()  # turns off different settings in the model not needed for evaluation/testing (dropout/batch norm layers).

    # Make predictions after training
    with torch.inference_mode():  # turns off gradient tracking & a couple of more things behing thr scenes that are not needed for evaluation/testing
        # 1. Do the forward pass
        test_pred = model_0(x_test)

        # 2. Calculate the loss
        test_loss = loss_fn(test_pred, y_test)

    # Print out what's happening
    if epoch % 10 == 0:
        epoch_count.append(epoch)
        train_loss_values.append(loss)
        test_loss_values.append(test_loss)
        print(f"Epoch: {epoch} | Loss: {loss} | Test loss: {test_loss}")
 
    # print(model_0.state_dict())

In [None]:
# Plot the loss curves
plt.plot(epoch_count, np.array(torch.tensor(train_loss_values).numpy()), label="Train loss")
plt.plot(epoch_count, test_loss_values, label="Test loss")
plt.title('Training and test loss curves')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

### Saving a model in PyTorch

There are three main methods you should know about for saving and loading models in PyTorch.

1. `torch.save()` - allows you to save a PyTorch object in Python's pickle format.
2. `torch.load()` - allows you to load a saved PyTorch object.
3. `torch.nn.Module.load_state_dict()` -  this allows to load a model's saved state dictionary

In [None]:
# Saving our PyTorch model
from pathlib import Path

# 1. Create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2. Create model save path
MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save the model state_dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH)

### Loading a PyTorch model
Since we save our model's state_dict() rather than the entire mode, we'll create a new instance of our model class and load the saved state_dict() into that.

In [None]:
model_0.state_dict()

In [None]:
# To load in a saved state_dict we have to instantiate a new instance of our model class
loaded_model_0 = LinearRegressionModel()

# Load the saved state_dict of model_0 (this will update the new instance with updated parameters)

loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))

In [None]:
loaded_model_0.state_dict()

In [None]:
# Make some predictions with our loaded model
loaded_model_0.eval()
with torch.inference_mode():
    loaded_model_preds = loaded_model_0(x_test)
    
loaded_model_preds

In [None]:
model_0.eval()
with torch.inference_mode():
    y_preds = model_0(x_test)
    
# Compare loaded model preds with original model preds
y_preds == loaded_model_preds
