In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np

In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
### loading the datasets and make it in a dataloader

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 64

trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Assuming the dataset sizes and batch size
total_train_data = len(trainloader.dataset)  # This gives the total number of training images
total_test_data = len(testloader.dataset)

print(f"Total training data: {total_train_data}")
print(f"Total testing data: {total_test_data}")

Total training data: 50000
Total testing data: 10000


In [None]:
### alex net architectures

class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
alexnet = AlexNet().cuda()

In [None]:
vgg16 = torchvision.models.vgg16(pretrained=True)
vgg16.classifier[6] = nn.Linear(4096, 100)
vgg16.cuda()



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
resnet50 = torchvision.models.resnet50(pretrained=True)
resnet50.fc = nn.Linear(resnet50.fc.in_features, 100)
resnet50.cuda()




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
googlenet = torchvision.models.googlenet(pretrained=True)
googlenet.fc = nn.Linear(googlenet.fc.in_features, 100)
googlenet.cuda()



GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [None]:
def train_model(model, criterion, optimizer, trainloader, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.cuda(), labels.cuda()

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {running_loss / 100:.3f}')
                running_loss = 0.0
    print('Finished Training')

criterion = nn.CrossEntropyLoss()

In [None]:
# For AlexNet
optimizer_alex = optim.SGD(alexnet.parameters(), lr=0.01, momentum=0.9)
train_model(alexnet, criterion, optimizer_alex, trainloader)

Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.007
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.006
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.005
Gradient norm of first layer: 0.006
Gradient norm of first layer

KeyboardInterrupt: 

In [None]:
# For VGG16
optimizer_vgg = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
train_model(vgg16, criterion, optimizer_vgg, trainloader)

Epoch 1, Batch 100, Loss: 4.432
Epoch 1, Batch 200, Loss: 3.454
Epoch 1, Batch 300, Loss: 2.932
Epoch 1, Batch 400, Loss: 2.678
Epoch 1, Batch 500, Loss: 2.575
Epoch 1, Batch 600, Loss: 2.485
Epoch 1, Batch 700, Loss: 2.406
Epoch 2, Batch 100, Loss: 2.255
Epoch 2, Batch 200, Loss: 2.219
Epoch 2, Batch 300, Loss: 2.208
Epoch 2, Batch 400, Loss: 2.171
Epoch 2, Batch 500, Loss: 2.121
Epoch 2, Batch 600, Loss: 2.130
Epoch 2, Batch 700, Loss: 2.051
Epoch 3, Batch 100, Loss: 1.973
Epoch 3, Batch 200, Loss: 1.998
Epoch 3, Batch 300, Loss: 1.936
Epoch 3, Batch 400, Loss: 1.969
Epoch 3, Batch 500, Loss: 1.979
Epoch 3, Batch 600, Loss: 1.906
Epoch 3, Batch 700, Loss: 1.954
Epoch 4, Batch 100, Loss: 1.880
Epoch 4, Batch 200, Loss: 1.867
Epoch 4, Batch 300, Loss: 1.840
Epoch 4, Batch 400, Loss: 1.837
Epoch 4, Batch 500, Loss: 1.808
Epoch 4, Batch 600, Loss: 1.806
Epoch 4, Batch 700, Loss: 1.845
Epoch 5, Batch 100, Loss: 1.793
Epoch 5, Batch 200, Loss: 1.757
Epoch 5, Batch 300, Loss: 1.745
Epoch 5,

KeyboardInterrupt: 

In [None]:
# For ResNet50
optimizer_resnet = optim.SGD(resnet50.parameters(), lr=0.001, momentum=0.9)
train_model(resnet50, criterion, optimizer_resnet, trainloader)

Epoch 1, Batch 100, Loss: 4.502
Epoch 1, Batch 200, Loss: 4.058


KeyboardInterrupt: 

In [None]:
# For GoogleNet
optimizer_google = optim.SGD(googlenet.parameters(), lr=0.001, momentum=0.9)
train_model(googlenet, criterion, optimizer_google, trainloader)

In [None]:
def evaluate_model(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy of the network on the test images: {accuracy} %')
    return accuracy

evaluate_model(alexnet, testloader)
evaluate_model(vgg16, testloader)
evaluate_model(resnet50, testloader)
evaluate_model(googlenet, testloader)

In [None]:
torch.save(alexnet.state_dict(), 'alexnet_cifar100.pth')
torch.save(vgg16.state_dict(), 'vgg16_cifar100.pth')
torch.save(resnet50.state_dict(), 'resnet50_cifar100.pth')
torch.save(googlenet.state_dict(), 'googlenet_cifar100.pth')

In [None]:
### new part from the book

from torchvision import models

transfer_model = models.resnet50(pretrained=True)



In [None]:
for name, param in transfer_model.named_parameters():
    print(f'Parameter Name: {name:30} | Shape: {str(param.shape):30} | Trainable: {param.requires_grad} | Number of Elements: {param.numel()}')

Parameter Name: conv1.weight                   | Shape: torch.Size([64, 3, 7, 7])      | Trainable: True | Number of Elements: 9408
Parameter Name: bn1.weight                     | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: bn1.bias                       | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: layer1.0.conv1.weight          | Shape: torch.Size([64, 64, 1, 1])     | Trainable: True | Number of Elements: 4096
Parameter Name: layer1.0.bn1.weight            | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: layer1.0.bn1.bias              | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: layer1.0.conv2.weight          | Shape: torch.Size([64, 64, 3, 3])     | Trainable: True | Number of Elements: 36864
Parameter Name: layer1.0.bn2.weight            | Shape: torch.Size([64])           

In [None]:
for name, param in transfer_model.named_parameters():
 if("bn" not in name):
  param.requires_grad = False

In [None]:
for name, param in transfer_model.named_parameters():
    print(f'Parameter Name: {name:30} | Shape: {str(param.shape):30} | Trainable: {param.requires_grad} | Number of Elements: {param.numel()}')

Parameter Name: conv1.weight                   | Shape: torch.Size([64, 3, 7, 7])      | Trainable: False | Number of Elements: 9408
Parameter Name: bn1.weight                     | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: bn1.bias                       | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: layer1.0.conv1.weight          | Shape: torch.Size([64, 64, 1, 1])     | Trainable: False | Number of Elements: 4096
Parameter Name: layer1.0.bn1.weight            | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: layer1.0.bn1.bias              | Shape: torch.Size([64])               | Trainable: True | Number of Elements: 64
Parameter Name: layer1.0.conv2.weight          | Shape: torch.Size([64, 64, 3, 3])     | Trainable: False | Number of Elements: 36864
Parameter Name: layer1.0.bn2.weight            | Shape: torch.Size([64])        

In [None]:
transfer_model.fc = nn.Sequential(nn.Linear(transfer_model.fc.in_features, 500),
                                  nn.ReLU(),
                                  nn.Dropout(),
                                  nn.Linear(500, 2))

In [None]:
import torch.optim as optimizer

optimizer = optimizer.Adam([
{ 'params': transfer_model.layer4.parameters(), 'lr': found_lr /3},
{ 'params': transfer_model.layer3.parameters(), 'lr': found_lr /9},
], lr=found_lr)

NameError: name 'found_lr' is not defined