# Additional Training Methods Review
These additional training methods can help make your models become more accurate and perform better as your data grows and/or you model becomes more complex.

In this demo we are going to demonstrate how to use Transfer Learning to train a model, introduce how to use Learning Rate Schedulers and how to share your models with the community. 

# Transfer Learning

In [1]:
# Import PyTorch vision models
from torchvision import models

In [None]:
# Print available models
print(models.list_models())
# Print number of availabl models
print(f"Number available: {len(models.list_models())}")

In [None]:
# Load a pre-trained model from pytorch
model = models.vgg19(pretrained=True)

In [4]:
# Load with weights argument (new API)
model = models.vgg19(weights=models.VGG19_Weights.DEFAULT)

In [None]:
# Show the parameters 
print(model.state_dict())

In [None]:
# Show the classifier layers
print(model.classifier)

In [38]:
import torch.nn as nn

# Modify the output layer
model.classifier[6] = nn.Linear(4096, 20) # Setting number of classes to 20

In [None]:
# Show the updates
print(model.classifier)

# PyTorch Hub
Not literally a Hub hosted by PyTorch but a way of sharing models with the community through Github.

Pytorch provides an easy to use module for interacting.

In [28]:
# Listing Available Models
from torch import hub

In [None]:
# List using a Github repo
hub.list('pytorch/vision:v0.10.0') # version

In [None]:
# List available yolov5 models
hub.list('ultralytics/yolov5')

Visit https://pytorch.org/hub/ for others

# Loading Models

In [31]:
# Import hub
from torch import hub 

In [None]:
# Load one of the yolov5 models
model = hub.load('ultralytics/yolov5', 'yolov5s') 

In [None]:
# Frome the Yolo documentation
# Images
imgs = ['cat-1.jpg', 'zidane.jpg']  # batch of images

# Inference
results = model(imgs)

# Results
results.print()

In [None]:
# Load weights
weights = hub.load("pytorch/vision", "get_model_weights", name="resnet50")

In [None]:
# Load the model with specific weights
model = hub.load("pytorch/vision", "resnet50", weights=weights.DEFAULT)

# Deploying to PyTorch Hub
Have a look at the `hubconf.py` file.

In [None]:
# Load our Model with and parameters
import torch

model = torch.hub.load('kodekloudhub/PyTorch', 'fake_model', pretrained=True) # Set to true

In [None]:
# Print the state_dict
print(model.state_dict())

In [None]:
# List our model
torch.hub.list('kodekloudhub/PyTorch')

In [None]:
# Get help about our model
torch.hub.help('kodekloudhub/PyTorch', 'fake_model')

In [None]:
# Load model without parameters
import torch

model = torch.hub.load('kodekloudhub/PyTorch', 'fake_model', pretrained=False) # Set to False

In [None]:
# Show the model
print(model)

In [None]:
# Modify the outputs
model.fc2 = nn.Linear(50, 2)

print(model)

# Learning Rate Schedulers
A learning rate scheduler dynamically adjusts the learning rate during training to improve how a model learns and converges. 

It helps prevent overshooting or inefficient updates by reducing the learning rate at specific intervals or based on performance. 

In [13]:
# Import the modules
import torch.optim as optim

In [14]:
# Define the optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [15]:
# Finally create the scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) 
# reduces by a factor of 0.1 every 5 epochs


In [16]:
# Another scheduler
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1, last_epoch=-1) 
# lr decays by multiplying with 0.1 every epoch.

In [17]:
# Another scheduler 
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2) 
# monitors a metric and reduces the lr by a factor of 0.1 if no improvement is seen for 2 epochs

In [None]:
# Has a state_dict!
print(scheduler.state_dict())

# Training a Model using a pre-trained model
Lets go through the process of creating a simple training loop using a pre-trained model and include out learning rate scheduler

In [None]:
# Define our model
from torchvision import models

print(models.list_models())


In [26]:
# Model
model = models.vgg19(weights=models.VGG19_Weights.DEFAULT)

In [None]:
# Define the dataset, transformations and dataloaders
import torch
import torchvision.transforms.v2 as v2
from torchvision import datasets

transform = v2.Compose([
    v2.Resize(224, 224),
    v2.RandomHorizontalFlip(),
    v2.ToImage(), 
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # Important Normalization
])

In [None]:
# CIFAR Dataset consists of 10 classes
# plane, car, bird, cat, deer, dog, frog, horse, ship, truck
trainset = datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=1)

In [None]:
import torch.nn as nn
# Now update the final layer
model.classifier[-1] = nn.Linear(4096, 10)

In [8]:
# For this model training we are only going to train the final layer (Feature Extraction)
for param in model.parameters():
    param.requires_grad = False

# setting requires_grad = False so that the gradients are not computed in backward()

In [9]:
# Unfreeze only the last layer
for param in model.classifier[-1].parameters():
    param.requires_grad = True

In [None]:
# Print layers
for name, param in model.named_parameters():
    print(f"Layer: {name}, requires_grad: {param.requires_grad}")

In [19]:
# Define our Loss function, optimizer and scheduler
criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(model.classifier[-1].parameters(), lr=0.001, momentum=0.9) # notice the optimizer

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)


In [None]:
# Run a training loop 
N_EPOCHS = 10

for epoch in range(N_EPOCHS):  # Loop over the dataset N_EPOCH times
    
    running_loss = 0.0 
    
    # Loop over the training data in batches
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data  
        # Clear gradients
        optimizer.zero_grad()  

        outputs = model(inputs)  
        loss = criterion(outputs, labels)  
        loss.backward()  
        optimizer.step()  
        
        running_loss += loss.item()  

    # Print the average loss for this epoch 
    print(f"Epoch: {epoch} Loss: {running_loss/len(trainloader)}")

    # Step the scheduler at the end of the epoch
    scheduler.step()
    
    ######### Save a checkpoint every 2 epochs
    if epoch % 2 == 0:
        torch.save({'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),  # Save scheduler state
                'loss': loss}, 
                f'training_checkpoint_{epoch}.tar')

# Save the final checkpoint after the last epoch
torch.save({
    'epoch': N_EPOCHS,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict(),  # Save scheduler state
    'loss': loss
}, 'training_checkpoint_final.tar')