In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available")
dataset_root = "sample_data"
model_path = "part1-model.pth"
model = None

GPU is available



# Part 1 - Classification

In [2]:
def make_model(n_cls=100):
    model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
    num_in_ftrs = model.classifier[6].in_features
    model.classifier[6] = nn.Linear(num_in_ftrs, n_cls) # type: ignore
    model = model.to(device)

    # freeze all the layers
    for param in model.parameters():  
        param.requires_grad = False
    # unfreeze the last linear layer.
    for param in model.classifier[6].parameters():
        param.requires_grad = True

    return model


# setup transformers
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])


def train_dataset(batch_size=64):
    trainset = datasets.CIFAR100(
        dataset_root, train=True, transform=data_transforms, download=True)

    train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True,
                                               num_workers=2, pin_memory=True, persistent_workers=True)

    return trainset, train_loader


def test_dataset(batch_size=64):
    testset = datasets.CIFAR100(
        dataset_root, train=False, transform=data_transforms, download=True)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True,
                                              num_workers=2, pin_memory=True, persistent_workers=True)

    return testset, test_loader

In [3]:
trainset, train_loader = train_dataset()
testset, test_loader = test_dataset()

# build the model
model = make_model()

# training time
num_epochs = 30
criterion = nn.CrossEntropyLoss()
# higher learning rate to train in less epochs
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# save the best model
best_epoch = 0
best_acc = 0.0
best_weights = None


def test():
    test_loss = 0.0
    test_correct = 0.0
    model.eval()
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            # score the model
            _, preds = torch.max(outputs, 1)
            test_loss += loss.item() * images.size(0)
            test_correct += torch.sum(preds == labels.data)

    return test_loss / len(testset), test_correct / len(testset)


for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    train_loss = 0.0
    train_correct = 0.0
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        # reset gradients
        optimizer.zero_grad()
        # calculate loss and backprop
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 20)

        optimizer.step()

        # score the model
        _, preds = torch.max(outputs, 1)
        train_loss += loss.item() * images.size(0)
        train_correct += torch.sum(preds == labels)

    # finished with this epoch
    scheduler.step()

    print(
        f"    train loss: {train_loss/len(trainset)} acc: {train_correct/len(trainset)}")
    epoch_loss, epoch_acc = test()
    print(f"    test loss: {epoch_loss} acc: {epoch_acc}")

    # store if this is the best iteration
    if epoch_acc > best_acc or not best_weights:
        best_epoch = epoch
        best_acc = epoch_acc
        best_weights = model.state_dict()

# force types
assert best_weights is not None, "no iterations were successful???"

print(f"Best iteration: {best_epoch + 1}, acc: {best_acc}")

# reset to the best iteration
model.load_state_dict(best_weights)

torch.save(best_weights, model_path)
print(f"Saved model to {model_path}")

Epoch 1/30
    train loss: 2.206428056488037 acc: 0.4371799826622009
    test loss: 1.6167892873764038 acc: 0.5478999614715576
Epoch 2/30
    train loss: 1.9413582613372802 acc: 0.5047000050544739
    test loss: 1.560542865753174 acc: 0.5722000002861023
Epoch 3/30
    train loss: 1.8950558100128174 acc: 0.5178599953651428
    test loss: 1.5373385904312133 acc: 0.5799999833106995
Epoch 4/30
    train loss: 1.865313833465576 acc: 0.5300799608230591
    test loss: 1.531566939163208 acc: 0.588699996471405
Epoch 5/30
    train loss: 1.834232890586853 acc: 0.5407800078392029
    test loss: 1.4964708995819092 acc: 0.6010000109672546
Epoch 6/30
    train loss: 1.8273655110549927 acc: 0.5453799962997437
    test loss: 1.4886995418548583 acc: 0.6006999611854553
Epoch 7/30
    train loss: 1.8019631964874268 acc: 0.5526399612426758
    test loss: 1.454771367073059 acc: 0.6085000038146973
Epoch 8/30
    train loss: 1.525068715286255 acc: 0.5928999781608582
    test loss: 1.2920723869323731 acc: 0.6

In [3]:
# if the model wasn't just trained, create a new one
if not model:
    model = make_model()

model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [None]:
# load the test set
testset, test_loader = test_dataset()

correct = 0
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)

        # score the model
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)


print(f"Test accuracy: {correct / len(testset)}")

Test accuracy: 0.548799991607666


In [5]:
from torchsummary import summary
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,