In [5]:
import os
from glob import glob
from warnings import simplefilter

In [6]:
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)

#### Configure path parameters and read data files

In [7]:
ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), *['..'] * 1)) 
IMAGES_DIR = os.path.join(ROOT_DIR, "data", "datasets", "aircraft", "images")

RADOM_SEED = 2020
TRAIN_FRAC = 0.8
VAL_FRAC = 0.1

In [8]:
ROOT_DIR = '/home/mids/m250420/Capstone/lc-14'

In [9]:
IMAGES_DIR

'/home/mids/m250420/Capstone/lc-14/data/datasets/aircraft/images'

In [10]:
image_dir = IMAGES_DIR
labels_fp = os.path.join(ROOT_DIR, "data", "datasets", "aircraft", "annotations.csv")

In [11]:
labels_fp

'/home/mids/m250420/Capstone/lc-14/data/datasets/aircraft/annotations.csv'

#### Load utility functions

In [12]:
os.chdir(ROOT_DIR)

In [13]:
import torch
import torch.nn as nn
import albumentations as A
from albumentations.pytorch import ToTensorV2

from src.utilities.data.aircraft_dataloader import get_dataloader

#### Define a minimal transformation pipeline

In [31]:
transformations = A.Compose([
    A.Resize(256, 256),  # This transform resizes each image before subsequent processing
    A.HorizontalFlip(p=0.5),  # This transform flips images horizontally with a 50% probability
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # This transformation normalizes from standard RGB to grayscale
    A.RandomRotate90(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
  
    ToTensorV2(),  # This transform maps our image to a torch.Tensor object
])
## play around with size -- 256 

#### Building a `torch.Dataloader`.

In [34]:
# Create the DataLoader
dataloader = get_dataloader(
  image_dir=image_dir,
  labels_fp=labels_fp,
  transformations=transformations,
  mode='train',
  train_frac=TRAIN_FRAC,
  val_frac=VAL_FRAC,
  seed=RADOM_SEED,
  batch_size=4,
  shuffle=True,
  num_workers=1,
)

#### Building a baseline MLP using `torch.nn.Module`

In [35]:
class BaselineMLP(nn.Module):
    def __init__(self):
        super(BaselineMLP, self).__init__()
        # First convolution layer
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Second convolution layer
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Third convolution layer with more filters
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fourth convolution layer (new addition)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.relu4 = nn.ReLU()
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layers
        self.fc1 = nn.Linear(128 * 16 * 16, 256)  # Adjust for the feature map size after pooling
        self.relu5 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 1)  # Final output layer

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.pool3(self.bn3(self.conv3(x)))  # Apply batch normalization after 3rd convolution
        x = self.pool4(self.relu4(self.conv4(x)))  # Apply 4th convolution layer and pooling

        x = x.view(x.size(0), -1)  # Flatten dynamically for batch size
        x = self.relu5(self.fc1(x))  # Fully connected layer
        x = self.fc2(x)  # Second fully connected layer
        x = self.fc3(x)  # Final prediction
        return x

#### Training the model

In [36]:
# Basic hyperparameters
learning_rate = 1e-5
num_epochs = 10

In [37]:
baseline_mlp_model = BaselineMLP()
criterion = nn.SmoothL1Loss(beta=1.0)  
#criterion = nn.MSELoss()  Mean Squared Error for regression
optimizer = torch.optim.Adam(baseline_mlp_model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
  for i, (images, targets) in enumerate(dataloader):
    if i == len(dataloader) - 1: continue  # save the last batch for demonstration
    # Forward pass
    outputs = baseline_mlp_model(images)
    loss = criterion(outputs.squeeze(), targets)  # Ensure outputs are squeezed to match counts shape

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")


Epoch [1/10], Loss: 14.6875
Epoch [2/10], Loss: 13.6323
Epoch [3/10], Loss: 11.7460
Epoch [4/10], Loss: 21.0803
Epoch [5/10], Loss: 19.6403
Epoch [6/10], Loss: 5.2284
Epoch [7/10], Loss: 8.0623
Epoch [8/10], Loss: 13.3601
Epoch [9/10], Loss: 9.0751
Epoch [10/10], Loss: 7.3661


Check the model's prediction on the validation set for a `torch.Dataset` with the same `train_frac`, `val_frac`, and `seed`.

In [38]:
# Create the DataLoader
val_dataloader = get_dataloader(
  image_dir,
  labels_fp,
  transformations=transformations,
  mode='val',
  train_frac=TRAIN_FRAC,
  val_frac=VAL_FRAC,
  seed=RADOM_SEED,
  batch_size=1,
  shuffle=False,
  num_workers=1,
)

In [39]:
losses = []

for i, (val_images, targets) in enumerate(val_dataloader):
  predicted_counts = baseline_mlp_model(val_images)
  # We validate based on the mean absolute error
  losses.append(torch.abs(predicted_counts - targets).item())
  print(f"val image {i+1}, predicted count: {predicted_counts.item():.4f}, true count: {targets.item():.4f}")

mean_loss = sum(losses) / len(losses)
print(f"Mean absolute error: {mean_loss:.4f}")

val image 1, predicted count: 31.1367, true count: 31.0000
val image 2, predicted count: 31.6776, true count: 24.0000
val image 3, predicted count: 29.9612, true count: 19.0000
val image 4, predicted count: 31.3931, true count: 49.0000
val image 5, predicted count: 29.4936, true count: 27.0000
val image 6, predicted count: 29.3425, true count: 15.0000
val image 7, predicted count: 30.2501, true count: 30.0000
val image 8, predicted count: 30.4587, true count: 52.0000
val image 9, predicted count: 31.7971, true count: 39.0000
val image 10, predicted count: 31.8876, true count: 26.0000
Mean absolute error: 8.8100


In [None]:
# push to github -- TestModel
# pull dataloader to TestModel notebook -- use that to test
# alpha