In [23]:
import torch
import torchvision
from torchvision.transforms import v2
from torchvision.datasets import FGVCAircraft
from torch.utils.data import DataLoader
from pathlib import Path


In [24]:
transform = v2.Compose(
            [
                v2.CenterCrop(size=(224, 224)),  # Or Resize(antialias=True)
                v2.PILToTensor(),
                v2.ToDtype(torch.float32),  # Normalize expects float input
                v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ]
        )

In [25]:
data_path = Path("../../data/input/")
train_set = FGVCAircraft(Path(data_path), split="train", annotation_level="variant", transform=transform, download=True)
test_set = FGVCAircraft(Path(data_path), split="test", annotation_level="variant", transform=transform, download=True)

train_loader = DataLoader(train_set, batch_size=16, shuffle=True, num_workers=2)
test_loader = DataLoader(test_set, batch_size=16, shuffle=False, num_workers=2)

In [32]:
file = open(Path(data_path, "fgvc-aircraft-2013b", "data", "variants.txt"), "r")
variants = file.readlines()
variants = [variant[:-1] for variant in variants]
print(variants)
file.close()

['707-320', '727-200', '737-200', '737-300', '737-400', '737-500', '737-600', '737-700', '737-800', '737-900', '747-100', '747-200', '747-300', '747-400', '757-200', '757-300', '767-200', '767-300', '767-400', '777-200', '777-300', 'A300B4', 'A310', 'A318', 'A319', 'A320', 'A321', 'A330-200', 'A330-300', 'A340-200', 'A340-300', 'A340-500', 'A340-600', 'A380', 'ATR-42', 'ATR-72', 'An-12', 'BAE 146-200', 'BAE 146-300', 'BAE-125', 'Beechcraft 1900', 'Boeing 717', 'C-130', 'C-47', 'CRJ-200', 'CRJ-700', 'CRJ-900', 'Cessna 172', 'Cessna 208', 'Cessna 525', 'Cessna 560', 'Challenger 600', 'DC-10', 'DC-3', 'DC-6', 'DC-8', 'DC-9-30', 'DH-82', 'DHC-1', 'DHC-6', 'DHC-8-100', 'DHC-8-300', 'DR-400', 'Dornier 328', 'E-170', 'E-190', 'E-195', 'EMB-120', 'ERJ 135', 'ERJ 145', 'Embraer Legacy 600', 'Eurofighter Typhoon', 'F-16A/B', 'F/A-18', 'Falcon 2000', 'Falcon 900', 'Fokker 100', 'Fokker 50', 'Fokker 70', 'Global Express', 'Gulfstream IV', 'Gulfstream V', 'Hawk T1', 'Il-76', 'L-1011', 'MD-11', 'MD-

In [33]:
next(iter(train_loader))

[tensor([[[[ 351.5939,  351.5939,  347.2271,  ...,  163.8210,  168.1878,
             181.2882],
           [ 355.9607,  351.5939,  347.2271,  ...,  181.2882,  194.3886,
             207.4891],
           [ 360.3275,  355.9607,  351.5939,  ...,  207.4891,  220.5895,
             233.6900],
           ...,
           [ 456.3974,  447.6638,  456.3974,  ...,   67.7511,   63.3843,
              63.3843],
           [ 421.4629,  417.0961,  417.0961,  ...,  207.4891,  198.7554,
             190.0218],
           [ 386.5284,  382.1616,  377.7947,  ...,  316.6594,  334.1266,
             347.2271]],
 
          [[ 404.2143,  404.2143,  399.7500,  ...,  234.5714,  239.0357,
             252.4286],
           [ 408.6786,  404.2143,  399.7500,  ...,  239.0357,  252.4286,
             265.8214],
           [ 413.1429,  408.6786,  404.2143,  ...,  265.8214,  279.2143,
             292.6071],
           ...,
           [ 466.7143,  471.1786,  471.1786,  ...,   73.8571,   82.7857,
              82.78

In [34]:
images, labels = next(iter(test_loader))
print(images.shape)
print(labels)

torch.Size([16, 3, 224, 224])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [35]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 53 * 53, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 100)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [36]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [37]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

Finished Training


In [39]:
outputs = net(images)
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join(f'{variants[predicted[j]]:5s}'
                              for j in range(4)))

Predicted:  DC-3  DC-3  DC-3  727-200


In [41]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 1 %
