# Deep Learning - Underwater object recognition - Residual Networks

This notebook focuses on using Residual Netowrks to detect objects in the underwater realm.

In [None]:
# Google COLAB init
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive')


In [5]:
# Imports

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
import os
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchmetrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from tqdm import tqdm

## Dataset loading and preparation

In [2]:
class UODDDataset(Dataset):
    def __init__(self, img_dir, label_dir, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.img_files = [f for f in os.listdir(img_dir) if f.endswith('.jpg')]

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_files[idx])
        img = Image.open(img_path).convert("RGB")

        label_path = os.path.join(self.label_dir, self.img_files[idx].replace('.jpg', '.txt'))
        with open(label_path, 'r') as f:
            labels = f.readlines()

        if self.transform:
            img = self.transform(img)

        label = int(labels[0].split()[0]) if labels else 0

        return img, label

# Define transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create datasets
train_dataset = UODDDataset(img_dir='./Datasets/YOLO_UODD/images/train', label_dir='./Datasets/YOLO_UODD/labels/train', transform=transform)
val_dataset = UODDDataset(img_dir='./Datasets/YOLO_UODD/images/val', label_dir='./Datasets/YOLO_UODD/labels/val', transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

## PyTorch's ResNet50

This section focuses on training and evaluation of the ResNet50 model provided by PyTorch.

In [3]:
from torchvision.models import ResNet50_Weights

# Load pre-trained ResNet model
model = models.resnet50(weights=ResNet50_Weights.DEFAULT)

# Modify the final layer to match the number of classes in UODD
num_classes = 4  # Update this to the actual number of classes in your dataset
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [6]:
from tqdm import tqdm

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

# Save the trained model
torch.save(model.state_dict(), 'resnet50_uodd.pth')

100%|██████████| 80/80 [00:19<00:00,  4.04it/s]


Epoch 1/10, Loss: 0.6852


100%|██████████| 80/80 [00:19<00:00,  4.09it/s]


Epoch 2/10, Loss: 0.5399


100%|██████████| 80/80 [00:19<00:00,  4.04it/s]


Epoch 3/10, Loss: 0.4775


100%|██████████| 80/80 [00:20<00:00,  3.95it/s]


Epoch 4/10, Loss: 0.4031


100%|██████████| 80/80 [00:20<00:00,  3.98it/s]


Epoch 5/10, Loss: 0.3577


100%|██████████| 80/80 [00:19<00:00,  4.01it/s]


Epoch 6/10, Loss: 0.2625


100%|██████████| 80/80 [00:20<00:00,  3.93it/s]


Epoch 7/10, Loss: 0.2187


100%|██████████| 80/80 [00:20<00:00,  3.98it/s]


Epoch 8/10, Loss: 0.1452


100%|██████████| 80/80 [00:20<00:00,  3.99it/s]


Epoch 9/10, Loss: 0.1103


100%|██████████| 80/80 [00:20<00:00,  4.00it/s]

Epoch 10/10, Loss: 0.0931





In [10]:
model.load_state_dict(torch.load('resnet50_uodd.pth'))

# Evaluation function with additional metrics
def evaluate_model(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    all_labels = []
    all_preds = []

    precision = torchmetrics.Precision(task="multiclass", num_classes=num_classes, average='macro').to(device)
    recall = torchmetrics.Recall(task="multiclass", num_classes=num_classes, average='macro').to(device)
    map_50 = torchmetrics.detection.MeanAveragePrecision(iou_thresholds=[0.5]).to(device)
    map_50_95 = torchmetrics.detection.MeanAveragePrecision(iou_thresholds=[0.5, 0.75, 0.95]).to(device)

    with torch.no_grad():
        for inputs, labels in tqdm(dataloader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct_predictions += torch.sum((preds == labels))

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

            precision.update(preds, labels)
            recall.update(preds, labels)
            map_50.update(preds, labels)
            map_50_95.update(preds, labels)

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = correct_predictions.double() / len(dataloader.dataset)
    epoch_precision = precision.compute().item()
    epoch_recall = recall.compute().item()
    epoch_map_50 = map_50.compute().item()
    epoch_map_50_95 = map_50_95.compute().item()

    return epoch_loss, epoch_acc, epoch_precision, epoch_recall, epoch_map_50, epoch_map_50_95, all_labels, all_preds

# Evaluate on validation set
val_loss, val_acc, val_precision, val_recall, val_map_50, val_map_50_95, val_labels, val_preds = evaluate_model(model, val_loader, criterion)
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")
print(f"Precision: {val_precision:.4f}, Recall: {val_recall:.4f}")
print(f"mAP@50: {val_map_50:.4f}, mAP@50-95: {val_map_50_95:.4f}")

# Confusion Matrix
cm = confusion_matrix(val_labels, val_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(num_classes))
disp.plot(cmap=plt.cm.Blues)
plt.show()


  0%|          | 0/4 [00:00<?, ?it/s]


ValueError: Expected argument `preds` to be of type Sequence, but got tensor([0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
        0, 1, 0, 1, 0, 1, 0, 0], device='cuda:0')