# SSD Lite model on the cows dataset from ICAERUS/France
Most of this is adapted from basic cookiecutter model training in pytorch.

Dataset consists of three areas: Jalogny, Derval and Mauron: these directly correspond to train/test/val sets.
I removed all images without an annotation in them (e.g. just a picture of a field, without a cow)
* train: jalogny: x img, x annotations
* test: derval: x imgs, x annotations
* val: mauron: x imgs, x annotations


In [None]:
!pip install tensorboard

Collecting tensorboard
  Using cached tensorboard-2.15.1-py3-none-any.whl (5.5 MB)
Collecting absl-py>=0.4 (from tensorboard)
  Using cached absl_py-2.0.0-py3-none-any.whl (130 kB)
Collecting grpcio>=1.48.2 (from tensorboard)
  Using cached grpcio-1.60.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
Collecting google-auth<3,>=1.6.3 (from tensorboard)
  Using cached google_auth-2.25.2-py2.py3-none-any.whl (184 kB)
Collecting google-auth-oauthlib<2,>=0.5 (from tensorboard)
  Using cached google_auth_oauthlib-1.2.0-py2.py3-none-any.whl (24 kB)
Collecting markdown>=2.6.8 (from tensorboard)
  Using cached Markdown-3.5.1-py3-none-any.whl (102 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard)
  Using cached tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)
Collecting werkzeug>=1.0.1 (from tensorboard)
  Using cached werkzeug-3.0.1-py3-none-any.whl (226 kB)
Collecting cachetools<6.0,>=2.0.0 (from google-auth<3,>=1.6.3->tensorb

In [4]:
import torch
import torchvision
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torchvision.datasets import VOCDetection
from torchvision.transforms.v2 import functional as F
from torchvision.transforms import v2
from torchvision import ops
import torchvision.transforms.v2 as transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import datapoints
from torchvision.ops import generalized_box_iou_loss
from torch.utils import tensorboard
from torchvision.datasets import VisionDataset
from IPython.core.debugger import set_trace

In [5]:

class CustomDataset(VisionDataset):
    def __init__(self, images, labels, boxes, transform=None):
        self.images = images
        self.labels = labels
        self.boxes = boxes
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        labels = torch.tensor(self.labels[idx], dtype=torch.long)
        boxes = torch.tensor(self.boxes[idx], dtype = torch.float32)
        target = {'boxes': boxes,
                 'labels': labels}
        
        if self.transform:
            image = self.transform(image)

        return image, target

In [6]:
# in the image_slicing.ipynb, the images are loaded and then tiled (in  a 320x320 grid), placed into a CustomDataset
# with normalized values and all as tensors (), these are then pickled and stored.

def collate_fn(batch):
    return tuple(zip(*batch))

# Create a VOC dataset
train_dataset = torch.load("data/train_set.pkl")
val_dataset = torch.load("data/val_set.pkl")
# Create a DataLoader for the VOC dataset
batch_size = 8
shuffle = True

#and put them in the loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=shuffle, drop_last=True, collate_fn = collate_fn)

In [None]:

from torchvision.ops import box_convert

# Training loop
num_epochs = 500
warmup_epochs = 50
evaluate_every = 10

# Create your SSD Lite model
model = ssdlite320_mobilenet_v3_large(weights= "SSDLite320_MobileNet_V3_Large_Weights.DEFAULT")

# Define your loss function and optimizer
criterion = generalized_box_iou_loss
optimizer = torch.optim.SGD(model.parameters(), lr=0.015, momentum=0.9)
# Set up the cosine annealing learning rate scheduler
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs - warmup_epochs)

writer = tensorboard.SummaryWriter()

# Define the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for epoch in range(num_epochs):
    model.train()
    for i, data in enumerate(train_loader):
        images, targets = data        
        
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        #targets = [{k: v.to(device).long() if k == "labels" else v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)
        losses = loss_dict["classification"] + loss_dict["bbox_regression"]


        # Backward pass and optimization
        losses.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}, Batch Loss: {losses}")
        
            # Update the learning rate
    scheduler.step()
        # Log the training loss to Tensorboard
    writer.add_scalar('Loss/train', losses.item(), epoch)
   
    
      # Evaluate on the validation set every 'evaluate_every' epochs
    if epoch % evaluate_every == 0:
        with torch.no_grad():
            val_losses = 0.0
            for images, targets in val_loader:
                images = [img.to(device) for img in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

                val_loss_dict = model(images, targets)
                
                val_losses += val_loss_dict["classification"] + loss_dict["bbox_regression"]

            avg_val_loss = val_losses / len(val_loader)

            # Log the validation loss to Tensorboard
            writer.add_scalar('Loss/val', avg_val_loss, epoch)
            print(f"Epoch {epoch + 1}, Validation Loss: {avg_val_loss.item()}")
            torch.save(model.state_dict(), f"models/ssdlite_cows_model_v4_e{epoch}.pth")

# Save the trained model
torch.save(model.state_dict(), f'models/ssdlite_cows_model_v4_{num_epochs}e.pth')