# This notebook trains the model for the aircraft classification task.

In [24]:
import numpy as np
import onnx
import onnxscript
import os
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.nn as nn
import torch


In [2]:
# Use hardware acceleration if available
if torch.cuda.is_available(): 
    device = torch.device("cuda") 
elif torch.mps.is_available(): 
    device = torch.device("mps")
else: 
    device = torch.device("cpu")

In [None]:
# Download the file
!wget https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/archives/fgvc-aircraft-2013b.tar.gz
# Extract the tar.gz file
!tar -xvzf fgvc-aircraft-2013b.tar.gz

In [3]:
# Load an image as test
img = Image.open('fgvc-aircraft-2013b/data/images/2251118.jpg')

# Resize to target size
img = img.resize((300, 200))

# Convert to numpy array
x = np.array(img)
print(x.shape)  # (200, 300, 3)

(200, 300, 3)


In [4]:
x

array([[[150, 154, 174],
        [151, 155, 174],
        [151, 155, 175],
        ...,
        [123, 146, 178],
        [125, 145, 178],
        [124, 146, 178]],

       [[151, 155, 174],
        [153, 155, 175],
        [153, 156, 175],
        ...,
        [124, 146, 180],
        [126, 145, 178],
        [126, 146, 179]],

       [[152, 156, 175],
        [154, 155, 175],
        [154, 155, 175],
        ...,
        [123, 147, 179],
        [124, 147, 179],
        [125, 147, 179]],

       ...,

       [[ 85,  87,  92],
        [ 68,  71,  75],
        [ 79,  82,  87],
        ...,
        [ 89, 110, 122],
        [ 43,  49,  54],
        [ 28,  31,  35]],

       [[ 75,  82,  87],
        [  2,   3,   4],
        [ 18,  20,  22],
        ...,
        [ 47,  60,  68],
        [  0,   0,   0],
        [  0,   0,   0]],

       [[ 46,  52,  59],
        [ 41,  47,  55],
        [ 46,  54,  61],
        ...,
        [ 12,  14,  16],
        [  0,   0,   0],
        [  1,   0,   0]]

In [5]:
# Let's create a data loader class

class AircraftDataset(Dataset):
    def __init__(self, data_dir, labels, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = [os.path.join(data_dir, f'{img_name}.jpg') for img_name in labels.keys()]
        self.classes = sorted(list(set(labels.values())))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
        self.labels = [self.class_to_idx[label] for label in labels.values()]

        #for label_name in self.classes:
        #   label_dir = os.path.join(data_dir, label_name)
        #    for img_name in self.labels[label_name].keys():
        #        self.image_paths.append(os.path.join(data_dir, img_name))
        #        self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


In [6]:
# We use ImageNet-like preprocessing

input_size = (300, 200)

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((input_size[0], input_size[1])),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size[0], input_size[1])),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])


In [7]:
labels = {}

for type in ['train', 'test', 'val']:
    with open(f'fgvc-aircraft-2013b/data/images_family_{type}.txt') as f:
        lines = f.readlines()

    lines = [line.strip() for line in lines]

    labels[type] = {line[:7]: line[8:] for line in lines}

# labels

In [19]:
# Using our DataLoader to load the dataset 

train_dataset = AircraftDataset(
    data_dir='./fgvc-aircraft-2013b/data/images',
    labels=labels['train'],
    transform=train_transforms
)

val_dataset = AircraftDataset(
    data_dir='./fgvc-aircraft-2013b/data/images',
    labels=labels['val'],
    transform=val_transforms
)

test_dataset = AircraftDataset(
    data_dir='./fgvc-aircraft-2013b/data/images',
    labels=labels['test'],
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
# Check if images have been loaded
N = 1  
dataiter = iter(train_loader)

image_list = []
label_list = []

for i in range(0, N): 
  image, label = next(dataiter)
  image_list.append(image)
  label_list.append(label)

label, image.shape

(tensor([ 8, 21, 40, 36, 37, 35,  3, 49, 58, 43,  4, 56, 53, 15, 24, 54, 45, 19,
         62,  7, 26, 37, 16, 15,  0, 48, 49, 62, 23, 15,  9, 17]),
 torch.Size([32, 3, 300, 200]))

In [10]:
len(sorted(list(set(labels['train'].values()))))

70

In [11]:
# Neural network
# We played with different versions of the model, which led to the addition
# of transforms.RandomHorizontalFlip() and transforms.RandomRotation(5) above:
num_classes = len(sorted(list(set(labels['train'].values()))))

model = nn.Sequential(
    # Block 1: Initial feature extraction
    # We start with 32 filters to catch basic edges/textures
    nn.Conv2d(3, 32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32), # Normalizes activations
    nn.ReLU(),
    nn.MaxPool2d(2, 2), # Output: 32 x 150 x 100
    
    # Block 2: Catching shapes
    nn.Conv2d(32, 64, kernel_size=3, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2, 2), # Output: 64 x 75 x 50
    
    # Block 3: Complex features (wing shapes, engines)
    nn.Conv2d(64, 128, kernel_size=3, padding=1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2, 2), # Output: 128 x 37 x 25
    
    # Feature Reduction
    # Adaptive pooling allows the model to work regardless of input size
    # and reduces the 37x25 dimensions to 1x1
    nn.AdaptiveAvgPool2d((1, 1)), 
    nn.Flatten(),
    
    # Classification Head
    nn.Linear(128, 256),
    nn.ReLU(),
    nn.Dropout(0.5), # Prevents neurons from co-depending too much
    nn.Linear(256, num_classes)
    # Note: No Softmax here if using nn.CrossEntropyLoss
    ).to(device)

In [12]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train() # Set to training mode (enables Dropout/BatchNorm)
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        # 1. Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # 2. Backward pass
        optimizer.zero_grad() # Reset gradients
        loss.backward()       # Compute gradients
        optimizer.step()      # Update weights
        
        # Metrics
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
    return running_loss / total, correct / total

def validate(model, loader, criterion, device):
    model.eval() # Set to evaluation mode (disables Dropout/BatchNorm)
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad(): # No gradient calculation for validation
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
    return running_loss / total, correct / total

In [15]:
criterion = nn.CrossEntropyLoss()

def make_model(learning_rate=0.001):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

In [None]:
# Let's try different learning rates

for learning_rate in [0.001, 0.01, 0.1]:
    
    print(f'---- Learning rate: {learning_rate} ----')

    model, optimizer = make_model(
        learning_rate=learning_rate
    )

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    num_epochs = 3

    for epoch in range(num_epochs):
        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        # Step the scheduler
        scheduler.step()
        
        print(f"Epoch {epoch+1}/{num_epochs}:")
        print(f"  Train Loss: {train_loss:.4f} | Acc: {train_acc:.2%}")
        print(f"  Val Loss:   {val_loss:.4f} | Acc: {val_acc:.2%}")


---- Learning rate: 0.001 ----
Epoch 1/3:
  Train Loss: 4.0910 | Acc: 6.72%
  Val Loss:   3.9944 | Acc: 8.01%
Epoch 2/3:
  Train Loss: 3.9980 | Acc: 7.59%
  Val Loss:   3.9572 | Acc: 7.86%
Epoch 3/3:
  Train Loss: 3.9467 | Acc: 8.31%
  Val Loss:   3.9128 | Acc: 8.67%
---- Learning rate: 0.01 ----
Epoch 1/3:
  Train Loss: 4.1056 | Acc: 7.20%
  Val Loss:   3.9975 | Acc: 8.04%
Epoch 2/3:
  Train Loss: 4.0456 | Acc: 7.62%
  Val Loss:   3.9935 | Acc: 8.22%
Epoch 3/3:
  Train Loss: 4.0217 | Acc: 8.28%
  Val Loss:   3.9789 | Acc: 8.25%
---- Learning rate: 0.1 ----
Epoch 1/3:
  Train Loss: 4.1559 | Acc: 7.68%
  Val Loss:   4.0910 | Acc: 8.01%
Epoch 2/3:
  Train Loss: 4.1122 | Acc: 7.80%
  Val Loss:   4.0874 | Acc: 8.01%
Epoch 3/3:
  Train Loss: 4.1066 | Acc: 8.01%
  Val Loss:   4.0970 | Acc: 8.01%


In [16]:
# Based on our testing above, we settle on a learning rate of 0.001


model, optimizer = make_model(
    learning_rate=0.001
)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

num_epochs = 10

for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    # Step the scheduler
    # scheduler.step()
    
    print(f"Epoch {epoch+1}/{num_epochs}:")
    print(f"  Train Loss: {train_loss:.4f} | Acc: {train_acc:.2%}")
    print(f"  Val Loss:   {val_loss:.4f} | Acc: {val_acc:.2%}")


Epoch 1/10:
  Train Loss: 4.0942 | Acc: 7.38%
  Val Loss:   3.9852 | Acc: 8.01%
Epoch 2/10:
  Train Loss: 3.9801 | Acc: 8.10%
  Val Loss:   3.9499 | Acc: 8.52%
Epoch 3/10:
  Train Loss: 3.9518 | Acc: 7.98%
  Val Loss:   3.9252 | Acc: 8.52%
Epoch 4/10:
  Train Loss: 3.9083 | Acc: 8.55%
  Val Loss:   3.8923 | Acc: 9.24%
Epoch 5/10:
  Train Loss: 3.8817 | Acc: 8.88%
  Val Loss:   3.8637 | Acc: 9.12%
Epoch 6/10:
  Train Loss: 3.8456 | Acc: 8.97%
  Val Loss:   3.8514 | Acc: 9.21%
Epoch 7/10:
  Train Loss: 3.8215 | Acc: 9.51%
  Val Loss:   3.8540 | Acc: 9.09%
Epoch 8/10:
  Train Loss: 3.7944 | Acc: 9.30%
  Val Loss:   3.8269 | Acc: 9.36%
Epoch 9/10:
  Train Loss: 3.7410 | Acc: 10.41%
  Val Loss:   3.7951 | Acc: 9.60%
Epoch 10/10:
  Train Loss: 3.7133 | Acc: 10.38%
  Val Loss:   3.7578 | Acc: 10.56%


In [20]:
# Let's apply the model to the test set

test_loss, test_acc = validate(model, test_loader, criterion, device)
print(f"  Test Loss: {test_loss:.4f} | Acc: {test_acc:.2%}")

  Test Loss: 3.7578 | Acc: 10.56%


In [27]:
# Saving the model to ONNX format
import json

model.to("cpu")
model.eval()

example_inputs = torch.randn(1, 3, 300, 200).to("cpu")
torch.onnx.export(model, example_inputs, "model.onnx", export_params=True, opset_version=14)

onnx_model = onnx.load("model.onnx")
class_names = sorted(list(set(labels['train'].values()))) # Our list of 70 names
meta = onnx_model.metadata_props.add()
meta.key = "class_names"
meta.value = json.dumps(class_names)
onnx.save(onnx_model, "model.onnx")


W0121 21:29:53.377000 66454 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 14 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `Sequential([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `Sequential([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 14).
Failed to convert the model to the target version 14 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "/Users/matthiastraut/Courses/Zoomcamp_ML/Homeworks/project2/.venv/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/matthiastraut/Courses/Zoomcamp_ML/Homeworks/project2/.venv/lib/python3.12/site-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/Users/matthiastraut/Courses/Zoomcamp_ML/Homeworks/project2/.venv/lib/python3.12/site-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return 

[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 3 of general pattern rewrite rules.
