<a href="https://colab.research.google.com/github/middlebury-csci-0451/CSCI-0451/blob/main/lecture-notes/convnets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Image Classification

### Data Access

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 50487805.74it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


### Visualizing The Data

First Model: Logistic Regression

In [3]:
import torch.nn as nn
import torch.nn.functional as F

class Logistic(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear1 = nn.Linear(3072, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = self.linear1(x)
        return x

model = Logistic()

In [4]:
import torch.optim as optim

def train(model, k_epochs = 1, print_every = 2000):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(k_epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            if i % print_every == print_every - 1:    # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / print_every:.3f}')
                running_loss = 0.0

    print('Finished Training')

train(model, k_epochs = 2)

[1,  2000] loss: 2.187
[1,  4000] loss: 2.187
[1,  6000] loss: 2.142
[1,  8000] loss: 2.149
[1, 10000] loss: 2.152
[1, 12000] loss: 2.180
[2,  2000] loss: 2.073
[2,  4000] loss: 2.117
[2,  6000] loss: 2.101
[2,  8000] loss: 2.091
[2, 10000] loss: 2.129
[2, 12000] loss: 2.116
Finished Training


In [5]:
def test(model):

    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

test(model)

Accuracy of the network on the 10000 test images: 34 %


### Second Model: 1 Hidden Layer

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Complete(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear1 = nn.Linear(3072, 10)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(10, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

model = Complete()

In [None]:
train(model, k_epochs=2)

In [None]:
test(model)

Accuracy of the network on the 10000 test images: 17 %


Third Model: Convolutional Neural Net

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model = ConvNet()

In [None]:
train(model, k_epochs=2)

[1,  2000] loss: 1.825
[1,  4000] loss: 1.743
[1,  6000] loss: 1.725
[1,  8000] loss: 1.682
[1, 10000] loss: 1.650
[1, 12000] loss: 1.634
[2,  2000] loss: 1.585
[2,  4000] loss: 1.585
[2,  6000] loss: 1.555
[2,  8000] loss: 1.553
[2, 10000] loss: 1.534
[2, 12000] loss: 1.528
Finished Training


In [None]:
test(model)

Accuracy of the network on the 10000 test images: 43 %
