# SIGNA Chapter 3 Implementation
This is the first time we'll be implementing fully-connected layers, also known as "dense" layers. We'll be using `nn.Linear()` for this functionality. We'll also use the Adam optimizer, and include some `scikit-learn` train-test splits.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import lightning as L
from torch.utils.data import TensorDataset, DataLoader

## Designing our Neural Net

In [2]:
class myNN(L.LightningModule):
    def __init__(self):
        super().__init__()

        # For convenience, use the same seed as the book
        L.seed_everything(seed=42)

        # Initialize the layers
        self.input_to_hidden = nn.Linear(in_features=2, out_features=2, bias=True)
        self.hidden_to_output = nn.Linear(in_features=2, out_features=3, bias=True)

        # Determine the loss function we want to use
        self.loss = nn.MSELoss(reduction='sum')

    def forward(self, input_values):
        # Here we specify how the architecture actually computes outputs
        output_values = self.input_to_hidden(input_values)
        output_values = F.relu(output_values)
        output_values = self.hidden_to_output(output_values)
        return output_values

    def configure_optimizers(self):
        # We try out the Adam optimizer
        return Adam(self.parameters(), lr=0.001)

    def training_step(self, batch):
        # This method basically runs one epoch
        # Pass training data to forward() and calculate loss
        inputs, labels = batch
        outputs = self.forward(inputs)
        loss = self.loss(outputs, labels)
        return loss

## Some Data Wrangling

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [4]:
# Read the data
url = "https://raw.githubusercontent.com/StatQuest/signa/main/chapter_03/iris.txt"
df = pd.read_table(url, sep=",", header=None)

# Rename the columns
df.columns = ["sepal_length",
              "sepal_width",
              "petal_length",
              "petal_width",
              "class"]

In [5]:
# Check the number of classes
print(df['class'].nunique(), "\n")

# See the classes
print(df['class'].unique(), "\n")

# See the frequencies per class, to see if the dataset is balanced
print(df['class'].value_counts())

3 

['Iris-setosa' 'Iris-versicolor' 'Iris-virginica'] 

class
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64


In [6]:
# Separate the actual input values
input_values = df[['petal_width', 'sepal_width']]

# Factorize the class labels
classes_as_numbers = df['class'].factorize()[0]

# Make a 75-25 train-test split
# Without stratify, you might get uneven class distributions between train/test sets. 
# With stratify, each set maintains the same proportion of each class as the original dataset.
input_train, input_test, label_train, label_test = train_test_split(input_values,
                                                                    classes_as_numbers,
                                                                    test_size=0.25,
                                                                    stratify=classes_as_numbers)

In [7]:
# Check out how the labels are structured
label_train

array([2, 1, 2, 0, 2, 2, 1, 0, 1, 2, 0, 2, 2, 1, 2, 2, 0, 1, 0, 1, 2, 2,
       2, 0, 1, 0, 0, 2, 0, 0, 0, 2, 1, 0, 1, 2, 1, 0, 2, 2, 1, 2, 2, 2,
       1, 1, 0, 0, 1, 1, 2, 2, 2, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 2, 2, 1,
       2, 0, 1, 2, 0, 1, 1, 1, 2, 1, 1, 2, 0, 0, 0, 2, 0, 2, 1, 2, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 0, 1, 2, 1, 0, 1, 1, 0, 0, 2, 0,
       0, 2])

In [8]:
# Instead of classes 0-2, we want this to be three-element arrays
one_hot_label_train = F.one_hot(torch.tensor(label_train), num_classes=3).type(torch.float32)
one_hot_label_train[:10]

tensor([[0., 0., 1.],
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [9]:
# Perform Min-Max Scaling with TRAINING min and max. 
# Use this on both training and testing to avoid data leakage
# Note: Min-Max Scaling = (input - min) / (max - min)

# CORRECT way with scikit-learn
scaler = MinMaxScaler()

# Fit only on training data
scaler.fit(input_train)

# Transform both datasets
input_train_normalized = scaler.transform(input_train)
input_test_normalized = scaler.transform(input_test)  # Uses training stats

In [10]:
# Convert the DataFrame input_train into tensors
input_train_tensors = torch.tensor(input_train.values).type(torch.float32)

# Load training data to dataloaders
train_dataset = TensorDataset(input_train_tensors, one_hot_label_train)
train_dataloader = DataLoader(train_dataset)

# Convert the DataFrame input_test into tensors
input_test_tensors = torch.tensor(input_test.values).type(torch.float32)

## Training Our Neural Net: Part 1

In [11]:
# Create a model
model = myNN()

# Print the parameters just to show they're initialized properly
for name, param in model.named_parameters():
    print(name, torch.round(param.data, decimals=2))

Seed set to 42


input_to_hidden.weight tensor([[ 0.5400,  0.5900],
        [-0.1700,  0.6500]])
input_to_hidden.bias tensor([-0.1500,  0.1400])
hidden_to_output.weight tensor([[-0.3400,  0.4200],
        [ 0.6200, -0.5200],
        [ 0.6100,  0.1300]])
hidden_to_output.bias tensor([0.5200, 0.1000, 0.3400])


In [12]:
# Create a trainer object
trainer = L.Trainer(max_epochs=10)
trainer.fit(model, train_dataloaders=train_dataloader)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/usr/local/lib/python3.13/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name             | Type    | Params | Mode 
-----------------------------------------------------
0 | input_to_hidden  | Linear  | 6      | train
1 | hi

Epoch 9: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 112/112 [00:00<00:00, 548.67it/s, v_num=9]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 112/112 [00:00<00:00, 538.27it/s, v_num=9]


## Evaluating Our Classification Results

In [13]:
from sklearn.metrics import accuracy_score

# Run the input_test_tensors through the neural network
predictions = model(input_test_tensors)

# Select the output with highest value.
# If the prediction is [0.05, 1.5, 0.0] then the output is 1
# dim=0 applies argmax to rows, dim=1 applies argmax to columns
predicted_labels = torch.argmax(predictions, dim=1)

# Calculate how many predictions are correct, i.e. accuracy
accuracy_score(torch.tensor(label_test), predicted_labels)

0.631578947368421

## Training Our Neural Net: Part 2
**Lightning** creates _checkpoint_ files that keep track of the Weights and Biases as they change. As a result, all we have to do to pick up where we left off is tell the `Trainer` where the checkpoint files are. This will save us time since **we don't have to retrain the first 10 epochs**.

In [14]:
path_to_checkpoint = trainer.checkpoint_callback.best_model_path ## By default, "best" = "most recent"

# First, create a new Lightning Trainer
trainer = L.Trainer(max_epochs=250) # Before, max_epochs=10, so, by setting it to 200, we're adding 240 more.

# Then call trainer.fit() using the path to the most recent checkpoint files
# so that we can pick up where we left off.
trainer.fit(model, train_dataloaders=train_dataloader, ckpt_path=path_to_checkpoint)

ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at /notebooks/lightning_logs/version_9/checkpoints/epoch=9-step=1120.ckpt
/usr/local/lib/python3.13/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:366: The dirpath has changed from '/notebooks/lightning_logs/version_9/checkpoints' to '/notebooks/lightning_logs/version_10/checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name             | Type    | Params | Mode 
-----------------------------------------------------
0 | input_to_hidden  | Linear  | 6      | train
1 | hidden_to_output | 

Epoch 23:  18%|â–ˆâ–Š        | 20/112 [00:00<00:00, 454.30it/s, v_num=10] 

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 249: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 112/112 [00:00<00:00, 135.34it/s, v_num=10]

`Trainer.fit` stopped: `max_epochs=250` reached.


Epoch 249: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 112/112 [00:00<00:00, 134.21it/s, v_num=10]


## Performance Increased!

In [15]:
from sklearn.metrics import accuracy_score

# Run the input_test_tensors through the neural network
predictions = model(input_test_tensors)

# Select the output with highest value.
# If the prediction is [0.05, 1.5, 0.0] then the output is 1
# dim=0 applies argmax to rows, dim=1 applies argmax to columns
predicted_labels = torch.argmax(predictions, dim=1) 

# Calculate how many predictions are correct, i.e. accuracy
accuracy_score(torch.tensor(label_test), predicted_labels)

0.8421052631578947

## Plug and Chug New Data

In [16]:
# Hypothetical new data
out_of_sample_data = pd.DataFrame({
    'petal_width': [0.2],
    'sepal_width': [3.0]
})

# Use the scaler that we fit with the training data
normalized_out_of_sample_data = scaler.transform(out_of_sample_data)

# Generate a prediction
model(torch.tensor(normalized_out_of_sample_data).type(torch.float32))

tensor([[ 0.0849,  1.0555, -0.4015]], grad_fn=<AddmmBackward0>)