# SIGNA Chapter 2 Implementation
This notebook basically goes through backpropagation 101. We will try to optimize the same architecture we used in the previous notebook.

In [1]:
import torch
import torch.nn as nn ## Gives us nn.Module()
import torch.nn.functional as F # Gives us relu()
from torch.optim import SGD # Note here we'll use SGD like traditional GD

import lightning as L ## New thing!
from torch.utils.data import TensorDataset, DataLoader ## We'll store our data in DataLoaders

## Desigining Our Neural Net
The architecture should do the following things.
1. Start with random initial weights and biases.
2. Generate an output "propagating forward" (i.e. an epoch), and calculate the gradient of our Empirical Loss (MSE in our case) w.r.t. a weight/bias.
3. Use this gradient and the learning rate parameter to calculate a step size via SGD.
4. Use the step size to calculate new weights, which will "propagate backward" (hence why we call it backpropagation).
5. Repeat this process until the weights and biases are optimized.

In [2]:
class myNN(L.LightningModule):
    def __init__(self):
        # Kinda like a constructor where we initialize varibles
        # Start by calling the superclass initializer
        super().__init__()

        # Intialize random weights and biases
        # Note, that here I'm just following the book, and bias usually starts at zero
        self.w1 = nn.Parameter(torch.tensor(0.06))
        self.b1 = nn.Parameter(torch.tensor(0.0))

        self.w2 = nn.Parameter(torch.tensor(3.49))
        self.b2 = nn.Parameter(torch.tensor(0.0))

        self.w3 = nn.Parameter(torch.tensor(-4.11))
        self.w4 = nn.Parameter(torch.tensor(2.74))

        # Define the empirical loss function of choice
        self.loss = nn.MSELoss(reduction="sum")

    def forward(self, input_values):
        # This method runs inputs through the network to make a prediction
        # This code is identical to the previous notebook
        top_x_axis_values = input_values * self.w1 + self.b1
        top_y_axis_values = F.relu(top_x_axis_values)

        bottom_x_axis_values = input_values * self.w2 + self.b2
        bottom_y_axis_values = F.relu(bottom_x_axis_values)

        output_values = top_y_axis_values * self.w3 + bottom_y_axis_values * self.w4
        return output_values

    def configure_optimizers(self):
        # New method which deals with the learning rate and optimization algorithm
        # Perform gradient descent on the params, with learning rate 0.01
        return SGD(self.parameters(), lr=0.01)
    
    def training_step(self, batch, batch_idx):
        # This method basically runs one epoch
        # Pass training data to forward() and calculate residuals
        inputs, labels = batch
        outputs = self.forward(inputs)
        loss = self.loss(outputs, labels)
        return loss

## Creating Training Data

In [3]:
# Generate some inputs and labels (i.e. true outcomes)
training_inputs = torch.tensor([0.0, 0.5, 1.0])
training_labels = torch.tensor([0.0, 1.0, 0.0])

# Package everything up into a DataLoader
training_dataset = TensorDataset(training_inputs, training_labels)
dataloader = DataLoader(training_dataset)

## Training the Model

In [4]:
# Create a model
model = myNN()

# Print the parameters just to show they're initialized properly
for name, param in model.named_parameters():
    print(name, torch.round(param.data, decimals=2))

w1 tensor(0.0600)
b1 tensor(0.)
w2 tensor(3.4900)
b2 tensor(0.)
w3 tensor(-4.1100)
w4 tensor(2.7400)


In [5]:
# Create a trainer object
trainer = L.Trainer(max_epochs=500, # essentially how many times we call training_step()
                    logger=False,
                    enable_checkpointing=False,
                    enable_progress_bar=False)

trainer.fit(model, train_dataloaders=dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type    | Params | Mode 
-------------------------------------------------
0 | loss         | MSELoss | 0      | train
  | other params | n/a     | 6      | n/a  
-------------------------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)
1         Modules in train mode
0         Modules in eval mode
/usr/local/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
`Trainer.fit` stopped: `max_epochs=500` reached.


## Plug and Chug

In [6]:
# Create the different doses we want to run through the neural network.
# torch.linspace() creates the sequence of numbers between, and including, 0 and 1.
# So this is 0.0, 0.1, ..., 1.0
input_doses = torch.linspace(start=0, end=1, steps=11)

# Optionally, we can round the output values to match the scale we want
torch.round(model(input_doses), decimals=2)

tensor([0.0000, 0.0200, 0.4500, 0.8700, 1.2000, 1.0000, 0.8000, 0.6000, 0.4000,
        0.2000, 0.0000], grad_fn=<RoundBackward1>)

In [7]:
# Notice that the parameters have also changed.
for name, param in model.named_parameters():
    print(name, torch.round(param.data, decimals=2))

w1 tensor(1.4800)
b1 tensor(-0.5700)
w2 tensor(2.7000)
b2 tensor(-0.2600)
w3 tensor(-4.2400)
w4 tensor(1.5800)
