In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available")
dataset_root = "sample_data"
model_path = "part1-model.pth"
model = None

GPU is available



# Part 1 - Classification

In [2]:
def make_model(n_cls=100):
    model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
    num_in_ftrs = model.classifier[6].in_features
    model.classifier[6] = nn.Linear(num_in_ftrs, n_cls)
    model = model.to(device)

    # freeze all the layers
    for param in model.parameters():  
        param.requires_grad = False
    # unfreeze the last linear layer.
    for param in model.classifier[6].parameters():
        param.requires_grad = True

    return model


# setup transformers
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [None]:
# load the training set
batch_size = 64
trainset = datasets.CIFAR100(dataset_root, train=True, transform = data_transforms, download=True)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True,
                                           num_workers=2, pin_memory=True, persistent_workers=True)

# build the model
model = make_model()

# training time
num_epochs = 10
criterion = nn.CrossEntropyLoss()
# higher learning rate to train in less epochs
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# save the best model
best_epoch = 0
best_acc = 0.0
best_weights = None

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    curr_loss = 0.0
    curr_correct = 0.0
    for images, labels in train_loader:            
        images, labels = images.to(device), labels.to(device)
        # reset gradients
        model.train()
        optimizer.zero_grad()
        # calculate loss and backprop
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        
        optimizer.step()

        # score the model
        _, preds = torch.max(outputs, 1)
        curr_loss += loss.item() * images.size(0)
        curr_correct += torch.sum(preds == labels)

    # finished with this epoch
    scheduler.step()
    epoch_loss = curr_loss / len(trainset)
    epoch_acc = curr_correct / len(trainset)
    print(f"    train loss: {epoch_loss} acc: {epoch_acc}")

    # store if this is the best iteration
    if epoch_acc > best_acc or not best_weights:
        best_epoch = epoch
        best_acc = epoch_acc
        best_weights = model.state_dict()

# force types
assert best_weights is not None, "no iterations were successful???"

print(f"Best iteration: {best_epoch + 1}, acc: {best_acc}")

# reset to the best iteration
model.load_state_dict(best_weights)

torch.save(best_weights, model_path)
print(f"Saved model to {model_path}")

Epoch 1/10
    train loss: 2.182555526885986 acc: 0.44132000000000005
Epoch 2/10
    train loss: 1.9365713777923583 acc: 0.50402
Epoch 3/10
    train loss: 1.895754975128174 acc: 0.5211600000000001
Epoch 4/10
    train loss: 1.859160849761963 acc: 0.53258
Epoch 5/10
    train loss: 1.8430161903381348 acc: 0.53642
Epoch 6/10
    train loss: 1.8173012637329102 acc: 0.54674
Epoch 7/10
    train loss: 1.8057684832000733 acc: 0.5520200000000001
Epoch 8/10
    train loss: 1.5332362850761414 acc: 0.5896600000000001
Epoch 9/10
    train loss: 1.45825863822937 acc: 0.6057600000000001
Epoch 10/10
    train loss: 1.4107701216506958 acc: 0.6107
Best iteration: 10, acc: 0.6107
Saved model to part1-model.pth


In [3]:
# if the model wasn't just trained, create a new one
if not model:
    model = make_model()

model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [4]:
# load the test set
batch_size = 100
testset = datasets.CIFAR100(dataset_root, train=False, transform = data_transforms, download=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)

correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)

        # score the model
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)



print(f"Test accuracy: {correct / len(testset)}")

Test accuracy: 0.548799991607666


In [5]:
from torchsummary import summary
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,