### Workflow of a Linear Regression model

In [None]:
# Import PyTorch and matplotlib
import torch
from torch import nn # nn contains all of PyTorch's building blocks for neural networks
import matplotlib.pyplot as plt

# Check PyTorch version
torch.__version__

In [None]:
# Setup device-agnostic code 
if torch.cuda.is_available():
    device = "cuda" # NVIDIA GPU
elif torch.backends.mps.is_available():
    device = "mps" # Apple GPU
else:
    device = "cpu" # Defaults to CPU if NVIDIA GPU/Apple GPU aren't available

print(f"Using device: {device}")

## 1.1 DATA

In [None]:
# Create some data using Linear Regression formula of y = mX + c 
weight = 0.7
bias = 0.3

# Create range values
start = 0
end = 1
step = 0.02

# Create X and y (features and labels)
X = torch.arange(start, end, step).unsqueeze(dim=1) # without unsqueeze, errors will pop up
y = weight * X + bias

print(f'X:{X[:10]}, y: {y[:10]}')

In [None]:
# Split data
train_split = int(0.8 * len(X)) # 80% of data used for training set, 20% for testing 
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

len(X_train), len(y_train), len(X_test), len(y_test)

## 1.2 BUILD MODEL

In [None]:
# Building a Pytorch Linear model 
class LinearRegressionModel_V2(nn.Module): 
    def __init__(self):
        super().__init__()
        # use nn.Linear() for creating the model parameters
        # also called: linear transform, probing layer, fully connected layer, dense layer
        self.linear_layer = nn.Linear(in_features=1, out_features=1)
    
    def forward(self, x):
        return self.linear_layer(x)

# class LinearRegressionModel(nn.Module): # <- almost everything in PyTorch is a nn.Module (think of this as neural network lego blocks)
#     def __init__(self):
#         super().__init__() 
#         self.weights = nn.Parameter(torch.randn(1, # <- start with random weights (this will get adjusted as the model learns)
#                                                 dtype=torch.float), # <- PyTorch loves float32 by default
#                                    requires_grad=True) # <- can we update this value with gradient descent?)

#         self.bias = nn.Parameter(torch.randn(1, # <- start with random bias (this will get adjusted as the model learns)
#                                             dtype=torch.float), # <- PyTorch loves float32 by default
#                                 requires_grad=True) # <- can we update this value with gradient descent?))

#     # Forward defines the computation in the model
#     def forward(self, x: torch.Tensor) -> torch.Tensor: # <- "x" is the input data (e.g. training/testing features)
#         return self.weights * x + self.bias # <- this is the linear regression formula (y = m*x + b)



In [None]:
# Set manual seed
torch.manual_seed(42)
model_1 = LinearRegressionModel_V2()
model_1, model_1.state_dict()

In [None]:
# Create the loss function
loss_fn = nn.L1Loss() # MAE loss is same as L1Loss

# Create the optimizer
optimizer = torch.optim.SGD(params=model_1.parameters(), # parameters of target model to optimize
                            lr=0.001) # learning rate (how much the optimizer should change parameters at each step, higher=more (less stable), lower=less (might take a long time))

## 1.3 TRAINING LOOP

In [None]:
# an epoch is one loop through the data... Set the number of epochs (hyperparameter because we set the value)
epochs = 200

### TRAINING 

# 0. Loop through the data
for epoch in range(epochs):

    # set model in training mode (this is the default state of a model)
    # it sets all paramters that require gradient descent to require gradients
    model_1.train()

    # 1. Forward pass on train data using the forward() method inside 
    y_pred = model_1(X_train)

    # 2. Calculate the loss (how different are our models predictions to the ground truth)
    loss = loss_fn(y_pred, y_train)

    # 3. Zero grad of the optimizer - not for first time; 2nd epoch onwards - 
    # we need to reset the optimiser gradients every epoch; start fresh each forward pass
    optimizer.zero_grad()

    # 4. Loss backwards - perform backpropagation on the loss wrt the parameters of the model (nn.Parameter mein wherever it is requires_grad = True)
    loss.backward()

    # 5. Progress the optimizer (perform gradient descent)
    optimizer.step()


    ### Testing
    # turns off the dfferent settings in the model which are not needed for evaluation/testing (batch norm layers/ dropout)
    model_1.eval()

    # turns off gradient descent
    with torch.inference_mode():
      # 1. Forward pass on test data
      test_pred = model_1(X_test)
  
      # 2. Caculate loss on test data
      test_loss = loss_fn(test_pred, y_test.type(torch.float)) # predictions come in torch.float datatype, so comparisons need to be done with tensors of the same type

      # Print out what's happening
      if epoch % 10 == 0:
            print(f"Epoch: {epoch} | MAE Train Loss: {loss} | MAE Test Loss: {test_loss} ")

In [None]:
model_1.state_dict()

In [None]:
weight, bias

In [None]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

# Since the linear layer starts with a random weights matrix, let's make it reproducible (more on this later)
torch.manual_seed(42)
# This uses matrix multiplication
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input 
                         out_features=6) # out_features = describes outer value 
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

In [None]:
# Create a tensor 
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

In [None]:
x.ndim

In [None]:
x[:, 0]

In [None]:
# Get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]

In [None]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
x[0, 0, :] # same as x[0][0]