In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU is not available")
dataset_root = "sample_data"
model_path = "part2-model.pth"
model = None

GPU is available



# Part 2 - Custom Classification

In [15]:
# Adapted from https://github.com/mcmerdith/cisc484/blob/hw4/HW%204_CNN-1.ipynb

class VGG16_Block(nn.Module):
    def __init__(self, input_channels, out_channels, rate=0.3, drop=True):
        super().__init__()
        # This line creates a 2D convolutional layer using PyTorch's nn.Conv2d module.
        self.conv = nn.Conv2d(input_channels, out_channels, 3, 1, 1)
        # self.bn = nn.BatchNorm2d(out_channels) # normalize the activations of the convolutional layer in the neural network.
        # introduces non-linearity (-ve --> zeros)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(rate)
        self.drop = drop

    # This method defines the forward pass of the convolutional block.
    def forward(self, x):
        # x = self.relu(self.bn(self.conv(x)))
        x = self.relu(self.conv(x))
        if self.drop:
            x = self.dropout(x)
        return x


def vgg16_layer(input_channels, out_channels, num, dropout=0.3, pool=True):
    layers = []
    for i in range(num):
        layers.append(VGG16_Block(input_channels,
                      out_channels, dropout, drop=False))
        input_channels = out_channels
    if pool:
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)


class ModifiedVGG16(nn.Module):
    def __init__(self, n_classes: int):
        super(ModifiedVGG16, self).__init__()
        self.features = nn.Sequential(
            vgg16_layer(3, 64, 1),  # 224x224 -> 112x112
            vgg16_layer(64, 128, 1),  # 112x112 -> 56x56
            vgg16_layer(128, 256, 1),  # 56x56 -> 28x28
            vgg16_layer(256, 256, 1),  # 28x28 -> 14x14
            vgg16_layer(256, 256, 1),  # 14x14 -> 7x7
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*7*7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, n_classes),
            nn.Flatten(),
        )

    # defines how data flows through the network during the forward pass.
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


def make_model(n_classes=100):
    return ModifiedVGG16(n_classes).to(device)


# setup transformers
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    # transforms.Resize((112,112)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])


def train_dataset(batch_size=64):
    trainset = datasets.CIFAR100(
        dataset_root, train=True, transform=data_transforms, download=True)

    train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True,
                                               num_workers=2, pin_memory=True, persistent_workers=True)

    return trainset, train_loader


def test_dataset(batch_size=64):
    testset = datasets.CIFAR100(
        dataset_root, train=False, transform=data_transforms, download=True)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True,
                                              num_workers=2, pin_memory=True, persistent_workers=True)

    return testset, test_loader

# Training

In [18]:
trainset, train_loader = train_dataset()
testset, test_loader = test_dataset()

# build the model
model = make_model()

# training time
num_epochs = 30
criterion = nn.CrossEntropyLoss()
# higher learning rate to train in less epochs
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# save the best model
best_epoch = 0
best_acc = 0.0
best_weights = None

def test():
    test_loss = 0.0
    test_correct = 0.0
    model.eval()
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            # score the model
            _, preds = torch.max(outputs, 1)
            test_loss += loss.item() * images.size(0)
            test_correct += torch.sum(preds == labels.data)

    return test_loss / len(testset), test_correct / len(testset)
        

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    train_loss = 0.0
    train_correct = 0.0
    model.train()
    for images, labels in train_loader:            
        images, labels = images.to(device), labels.to(device)
        # reset gradients
        optimizer.zero_grad()
        # calculate loss and backprop
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 20)
        
        optimizer.step()

        # score the model
        _, preds = torch.max(outputs, 1)
        train_loss += loss.item() * images.size(0)
        train_correct += torch.sum(preds == labels)

    # finished with this epoch
    scheduler.step()
    
    print(f"    train loss: {train_loss/len(trainset)} acc: {train_correct/len(trainset)}")
    epoch_loss, epoch_acc = test()
    print(f"    test loss: {epoch_loss} acc: {epoch_acc}")

    # store if this is the best iteration
    if epoch_acc > best_acc or not best_weights:
        best_epoch = epoch
        best_acc = epoch_acc
        best_weights = model.state_dict()

# force types
assert best_weights is not None, "no iterations were successful???"

print(f"Best iteration: {best_epoch + 1}, acc: {best_acc}")

# reset to the best iteration
model.load_state_dict(best_weights)

torch.save(best_weights, model_path)
print(f"Saved model to {model_path}")

Epoch 1/30
    train loss: 4.04836769569397 acc: 0.0847800001502037
    test loss: 3.4664439685821535 acc: 0.17970000207424164
Epoch 2/30
    train loss: 3.2431523509216307 acc: 0.21427999436855316
    test loss: 2.963332625198364 acc: 0.27950000762939453
Epoch 3/30
    train loss: 2.751947156982422 acc: 0.3138200044631958
    test loss: 2.495548673248291 acc: 0.3660999834537506
Epoch 4/30
    train loss: 2.357692910346985 acc: 0.39552000164985657
    test loss: 2.268086186981201 acc: 0.41519999504089355
Epoch 5/30
    train loss: 2.0242832818603516 acc: 0.46998000144958496
    test loss: 2.0925655405044554 acc: 0.45389997959136963
Epoch 6/30
    train loss: 1.7323317543029786 acc: 0.536579966545105
    test loss: 2.106599512863159 acc: 0.4640999734401703
Epoch 7/30
    train loss: 1.453368254108429 acc: 0.603119969367981
    test loss: 2.0431099254608154 acc: 0.47519999742507935
Epoch 8/30
    train loss: 0.8272034720611572 acc: 0.7730599641799927
    test loss: 1.9405837532043457 acc

# Testing

In [19]:
# if the model wasn't just trained, create a new one
if not model:
    model = make_model()

model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [20]:
# load the test set
testset, test_loader = test_dataset()

correct = 0
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)

        # score the model
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels.data)



print(f"Test accuracy: {correct / len(testset)}")

Test accuracy: 0.539900004863739


# Debugging

In [50]:
from torchsummary import summary
model = make_model()
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
       VGG16_Block-3         [-1, 64, 224, 224]               0
         MaxPool2d-4         [-1, 64, 112, 112]               0
            Conv2d-5        [-1, 128, 112, 112]          73,856
              ReLU-6        [-1, 128, 112, 112]               0
       VGG16_Block-7        [-1, 128, 112, 112]               0
         MaxPool2d-8          [-1, 128, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         295,168
             ReLU-10          [-1, 256, 56, 56]               0
      VGG16_Block-11          [-1, 256, 56, 56]               0
        MaxPool2d-12          [-1, 256, 28, 28]               0
           Conv2d-13          [-1, 256, 28, 28]         590,080
             ReLU-14          [-1, 256,