# Building a Neural Network
In this demo we are going to demonstrate how to build and train a model using PyTorch.

This model will be a neural network type.

# Create a Class
REVIEW: Building a neural network is made simple in PyTorch.

This is because of the `nn.Module` which we inherit when we create our class to simplify building, managing and organizing our model.

This is used to lay the blueprint for our model.

### Structure of our Class

`__init__()`: This is where we define the layers of our network.


`forward()`: This is where we define how data is processed through our layers to get a prediction.


In [1]:
# Import the nn module
import torch.nn as nn 

In [2]:
# Create a simple class
class SimpleNeuralNetwork(nn.Module):
    def __init__(self):
        super(SimpleNeuralNetwork, self).__init__() # initialize superclass for automatic parameters
        
        # Define the layers: an input layer, a hidden layer, and an output layer
        self.input_layer = nn.Linear(10, 20)  # Input size of 10, output size of 20
        self.hidden_layer = nn.Linear(20, 15) # Hidden layer with input size of 20, output size of 15
        self.output_layer = nn.Linear(15, 1)  # Output layer with input size of 15, output size of 1
        
        # Define the activation function (introduces non-linearity into the model)
        self.activation = nn.ReLU()

    # Define the forward pass
    def forward(self, x):
        x = self.activation(self.input_layer(x))  # Pass data through the input layer
        x = self.activation(self.hidden_layer(x)) # Pass data through the hidden layer
        x = self.output_layer(x)                  # Pass data through the output layer (no activation here)
        return x

In [3]:
# Demonstrate our model with an example
import torch

# Create a tensor with shape (5, 10) - batch of 5 samples, each with 10 features
example_tensor = torch.randn(5, 10)
print(example_tensor.size())


torch.Size([5, 10])


In [4]:
# Create the input layer as it is in our Class
input_layer = nn.Linear(10, 20)

# Run the example through our input layer
input_linear_example = input_layer(example_tensor)
print(input_linear_example.size())

torch.Size([5, 20])


In [5]:
# Do the same but with the hidden layer
hidden_layer = nn.Linear(20, 15)

# Run the input_linear_example through hidden layer
hidden_linear_example = hidden_layer(input_linear_example)
print(hidden_linear_example.size())

torch.Size([5, 15])


In [6]:
# Same for the output layer
output_layer = nn.Linear(15, 1)

# Run hidden_linear_example through output layer
ouput_linear_example = output_layer(hidden_linear_example)
print(ouput_linear_example.size())

torch.Size([5, 1])


In [7]:
# Now with activation layer ReLU befor and after on the output example
print(f"Before ReLU: {ouput_linear_example}\n\n")

Before ReLU: tensor([[-0.2273],
        [-0.1336],
        [-0.1640],
        [-0.3476],
        [ 0.0086]], grad_fn=<AddmmBackward0>)




In [8]:
# Run through ReLU
activation_relu_example = nn.ReLU()(ouput_linear_example)
print(f"After ReLU: {activation_relu_example}")

After ReLU: tensor([[0.0000],
        [0.0000],
        [0.0000],
        [0.0000],
        [0.0086]], grad_fn=<ReluBackward0>)


In [9]:
# Create an example with forward

# Recreate instance of activation layer
activation = nn.ReLU()

# Pass example through input layer and apply ReLU
x = activation(input_layer(example_tensor))
# Pass through hidden layer and apply ReLU 
x = activation(hidden_layer(x))
# Pass through output layer (no activation)
output = output_layer(x)

print("Example Tensor:")
print(example_tensor)
print("\nOutput Tensor:")
print(output)

Example Tensor:
tensor([[ 6.4633e-01,  1.4995e+00, -4.7450e-04, -2.1584e+00,  1.6394e+00,
          7.2253e-01,  1.1799e-01, -2.1203e-01, -1.6965e-01,  4.4847e-01],
        [ 3.6641e-01, -5.8538e-02,  1.2369e+00,  3.8564e-01,  4.8086e-01,
          4.0321e-01,  1.9775e-01,  1.9555e-01,  1.0579e+00,  1.2314e+00],
        [ 8.3325e-02, -9.7946e-02,  1.0330e-02, -1.4780e-01, -2.1924e-01,
         -1.4236e-01, -6.5612e-01,  1.7550e+00, -4.6690e-01,  6.7153e-01],
        [ 8.8750e-01,  9.9059e-01,  4.5990e-01, -6.6787e-01, -9.3856e-02,
         -6.3551e-01, -1.7154e+00, -6.2987e-01,  1.7602e+00,  9.7986e-01],
        [-3.7544e-01, -1.4572e+00,  1.3938e+00, -8.7970e-01, -1.8274e+00,
          3.0554e-01, -1.1361e-01,  1.5224e+00, -5.8151e-01, -1.8045e-01]])

Output Tensor:
tensor([[-0.2184],
        [-0.1969],
        [-0.1859],
        [-0.2171],
        [-0.1646]], grad_fn=<AddmmBackward0>)


# Create the Model


In [10]:
# Initialize the model
model = SimpleNeuralNetwork()


In [11]:
# Show the layers
print(model)

SimpleNeuralNetwork(
  (input_layer): Linear(in_features=10, out_features=20, bias=True)
  (hidden_layer): Linear(in_features=20, out_features=15, bias=True)
  (output_layer): Linear(in_features=15, out_features=1, bias=True)
  (activation): ReLU()
)


# Model Parameters
Layers have associate weights and biases.

These weights and biases get adjusted during model training.

Lucky for us, the adjustments are tracked automatically by PyTorch!

In [12]:
# Loop through the parameters in human readable
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Layer: input_layer.weight | Size: torch.Size([20, 10]) | Values : tensor([[ 0.0392,  0.1515, -0.2488, -0.0223,  0.1219,  0.2216,  0.1022, -0.1129,
          0.2168, -0.1572],
        [-0.2833, -0.2092, -0.1960,  0.1560,  0.2170, -0.3085, -0.0034,  0.1474,
          0.0305,  0.2033]], grad_fn=<SliceBackward0>) 

Layer: input_layer.bias | Size: torch.Size([20]) | Values : tensor([-0.2154, -0.1001], grad_fn=<SliceBackward0>) 

Layer: hidden_layer.weight | Size: torch.Size([15, 20]) | Values : tensor([[-5.2847e-02,  2.0027e-04,  1.5755e-01,  1.4706e-01,  1.6625e-01,
          1.0633e-01, -1.3583e-01, -5.3231e-02, -8.4812e-02, -3.3965e-02,
          8.1506e-02,  6.7148e-02,  6.9493e-02, -1.6088e-01, -1.3256e-01,
         -3.9679e-02, -1.8114e-01,  3.3114e-02,  1.0906e-01,  3.9972e-02],
        [ 1.7438e-02,  2.0866e-01,  9.6094e-02,  1.7150e-01,  4.7751e-02,
         -4.6895e-02,  6.5745e-02, -1.3358e-01, -1.2218e-01,  4.5894e-02,
         -1.9258e-01,  1.5990e-01,  1.6079e-01,  7.7859e-02,

In [13]:
# here we see the current shape and values for each layers weight and bias

In [14]:
# Another way to display parameters
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 3.9218e-02,  1.5150e-01, -2.4881e-01, -2.2315e-02,  1.2193e-01,
          2.2158e-01,  1.0221e-01, -1.1285e-01,  2.1681e-01, -1.5720e-01],
        [-2.8330e-01, -2.0917e-01, -1.9603e-01,  1.5597e-01,  2.1704e-01,
         -3.0851e-01, -3.3981e-03,  1.4735e-01,  3.0495e-02,  2.0333e-01],
        [ 2.7765e-01,  6.6041e-02,  7.0729e-02,  1.6976e-01,  2.2319e-01,
          1.0814e-01,  2.4366e-02, -2.5108e-01, -1.0507e-01, -1.7668e-01],
        [ 2.6568e-01, -3.9034e-03,  2.6493e-01,  1.7411e-01,  2.7335e-01,
         -1.8790e-01, -2.4024e-01, -1.5408e-01, -1.5831e-02, -2.2285e-01],
        [-3.0120e-01, -2.1881e-01,  9.2978e-02,  2.8449e-01, -4.1395e-02,
         -2.0326e-01,  2.3159e-01, -5.9635e-02,  7.7009e-02, -2.8341e-02],
        [ 8.4580e-02, -2.7772e-01, -7.8919e-02, -1.9953e-01, -7.7976e-02,
         -1.5422e-01, -3.0460e-01, -1.1211e-02,  8.8838e-02,  1.2237e-01],
        [-1.5768e-01, -2.1675e-01, -2.2207e-02, -2.0532e-01, -2.0846e-01,
         -

#### Review Autograd
In PyTorch, autograd automatically computes gradients, which is essential for training a neural network by adjusting its weights to improve predictions.

`model.parameters()` provides access to the model’s weights and biases, which are PyTorch tensors that have `requires_grad=True`. 

This means they automatically participate in PyTorch's autograd system, which tracks operations on these tensors to build a computation graph.

Very Powerful!

# Define a Loss Function and Optimizer

REVIEW:

Loss Function: Measures how well the model's predictions match the actual data, guiding the model on how much to adjust to improve.


Optimizer: Updates the model's parameters based on the loss, using methods like gradient descent to minimize errors and improve performance over time.

In [15]:
# Create a common loss function for an Image Classifier

# Part of the nn module
import torch.nn as nn

In [16]:
# Create an instance of the loss function
criterion = nn.CrossEntropyLoss() # commonly used for classification 

In [17]:
# Import optimizer modules
import torch.optim as optim

In [18]:
# Create an optimizer instance and provide it the parameters
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # SGD is commonly used in classifcation

# Go through the steps of Training a Model

But first we must create our data and transformations.

We are going to use the MNIST preloaded Dataset.

In [19]:
import torchvision.datasets
import torch
import torchvision
from torchvision.transforms import v2

# Define Transforms. Already resized.
transform = v2.Compose(
    [v2.ToImage(), 
     v2.ToDtype(torch.float32, scale=True),
     v2.Normalize((0.5,), (0.5,))]) # These are grayscale images

# Training dataset and dataloader
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32,
                                          shuffle=True, num_workers=1)

# Validation dataset and dataloader
val_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32,
                                         shuffle=False, num_workers=1)


100%|██████████| 26.4M/26.4M [00:00<00:00, 27.9MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 1.64MB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 24.0MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 29.5MB/s]


# Create a NN for an Image Classifier
Here we are going to create a Neural Network to train an image classifcation model.



In [20]:
# Create the Class
import torch.nn as nn
# This module simplifies a way to import Operations (Activation Functions)
import torch.nn.functional as F 


class ImageClassificationNet(nn.Module):
    def __init__(self):
        super(ImageClassificationNet, self).__init__()
        # Takes an input with 1 channel , outputs 6 feature maps, uses a 5x5 kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        # Takes 6 input feature maps from the previous layer, outputs 16 feature maps, uses a 5x5 kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        # Define a max pooling layer to downsample the feature maps by a factor of 2
        self.pool = nn.MaxPool2d(2, 2)
        # Takes the flattened output from the convolutional layers (16 feature maps of size 5x5) and outputs 120 units
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        # Define the second fully connected layer, which maps 120 units to 84 units
        self.fc2 = nn.Linear(120, 84)
        # 10 classes for classification
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Pass input `x` through the first convolutional layer, apply ReLU activation, then apply max pooling
        x = self.pool(F.relu(self.conv1(x)))
        # Pass the result through the second convolutional layer, apply ReLU activation, then apply max pooling
        x = self.pool(F.relu(self.conv2(x)))
        # Flatten the feature maps into a 1D vector, keeping the batch dimension
        x = torch.flatten(x, 1)
        # Pass through the first fully connected layer and apply ReLU activation
        x = F.relu(self.fc1(x))
        # Pass through the second fully connected layer and apply ReLU activation
        x = F.relu(self.fc2(x))
        # Pass through the third fully connected layer to get the output (raw scores for each class)
        x = self.fc3(x)
        # Return the output scores (logits) for each class
        return x


# Create the Model

In [21]:
# Remember how to check for GPU?
import torch

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [22]:
# Creates an instance of our model
model = ImageClassificationNet().to(device)

# Print it
print(model)

ImageClassificationNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


# Loss Funtion and Optimizer
Use same as above

In [23]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Create a Training Loop
A training loop in PyTorch is the process of iteratively feeding data through a model, calculating the loss, and updating the model’s parameters to minimize that loss. 

This loop continues for a set number of epochs or until the model reaches satisfactory performance on the training data.

In [24]:
# Define our number of training loops
N_EPOCHS = 3

In [25]:
for epoch in range(N_EPOCHS):  # Loop over the dataset N_EPOCH times
    
    running_loss = 0.0  # Initialize the running loss for the current epoch
    
    # Loop over the training data in batches
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data  # Unpack the data; inputs are the images, labels are the classes
        
        optimizer.zero_grad()  # Clear the gradients for the optimizer to avoid accumulation from previous steps

        outputs = model(inputs)  # Forward pass: compute the model's predictions on the inputs
        loss = criterion(outputs, labels)  # Calculate the loss by comparing predictions to true labels
        loss.backward()  # Backward pass: compute gradients of the loss with respect to model parameters
        optimizer.step()  # Update model parameters based on the computed gradients
        
        running_loss += loss.item()  # Accumulate the loss for the current epoch

    # Print the average loss for this epoch by dividing the accumulated loss by the number of batches
    print(f"Epoch: {epoch} Loss: {running_loss/len(train_loader)}")


Epoch: 0 Loss: 1.4718006820042928
Epoch: 1 Loss: 0.5883237002849578
Epoch: 2 Loss: 0.4931527696251869


# Create a Training Loop with Validation

In [26]:
for epoch in range(N_EPOCHS):  # Loop over the dataset N_EPOCH times
    
    ####### TRAINING
    training_loss = 0.0  # Initialize the training loss for the current epoch
    # Set the model to training mode
    model.train()
    # Loop over the training data in batches
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data  # Unpack the data; inputs are the images, labels are the classes
        
        optimizer.zero_grad()  # Clear the gradients for the optimizer to avoid accumulation from previous steps

        outputs = model(inputs)  # Forward pass: compute the model's predictions on the inputs
        loss = criterion(outputs, labels)  # Calculate the loss by comparing predictions to true labels
        loss.backward()  # Backward pass: compute gradients of the loss with respect to model parameters
        optimizer.step()  # Update model parameters based on the computed gradients
        
        training_loss += loss.item()  # Accumulate the training loss for the current epoch

    ######## VALIDATION
    val_loss = 0.0 # Initialize the validation loss for the current epoch
    # Set the model to evaluation 
    model.eval()

    # Loop over the validation data in batches
    for i, data in enumerate(val_loader, 0):
        inputs, labels = data  # Unpack the data like we do above
        
        outputs = model(inputs)  # Compute predictions
        loss = criterion(outputs, labels)  # Calculate the loss by
        
        #### NOTICE we do not compute gradients and/or adjust weights #### 
        val_loss += loss.item()  # Accumulate the loss for the current epoch

    # Print the training loss and the val loss
    print(f"Epoch: {epoch} Train Loss: {training_loss/len(train_loader)} Val Loss: {val_loss/len(val_loader)}")

Epoch: 0 Train Loss: 0.43733544745047886 Val Loss: 0.4498921820340446
Epoch: 1 Train Loss: 0.4028322628815969 Val Loss: 0.4282173962591174
Epoch: 2 Train Loss: 0.377310985426108 Val Loss: 0.3835013898940513


# About Loss
If validation continues to decrease, its performing well.

If training continues to decrease but validation does not, its likely that its overfitting.

Likely we would need many more epochs to train an accurate model.