In [None]:
# THis solution is implemented only using Pytorch, here are the imports that are necessary.
import os
import cv2
import time
import random
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torch
import torchvision

from torch.utils.data import DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN


# Fix randomness, https://pytorch.org/docs/stable/notes/randomness.html
# we try to limit the number of sources of nondeterministic beahviour so that most operations, given the same inputs, produce the same result

def fix_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    
fix_all_seeds(42)

In [None]:
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

BASE_DIR = "../input/tensorflow-great-barrier-reef/train_images/"

# Configuration for the Optimizer
LEARNING_RATE = 0.0025
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005

# Number of epochs
NUM_EPOCHS = 12

BATCH_SIZE = 8

In [None]:
df = pd.read_csv("../input/reef-cv-strategy-subsequences-dataframes/train-validation-split/train-0.1.csv")

# Turn annotations from strings into lists of dictionaries
df['annotations'] = df['annotations'].apply(eval)

# Create the image path for the row
df['image_path'] = "video_" + df['video_id'].astype(str) + "/" + df['video_frame'].astype(str) + ".jpg"

df.head()

In [None]:
#From the result, we can see that there are a lot of images with no annotations, of about 80% of the original images provided within the competition dataset.
(df['annotations'].str.len() > 0).value_counts()

In [None]:
(df['annotations'].str.len() > 0).value_counts(normalize=True).round(2)

In [None]:
# We shall drop images with no annotation to speed up the training process
df = df[df['annotations'].str.len() > 0].reset_index(drop=True)

In [None]:
#here we create the training and validation split, and show the number of images within each portion.
df_train = df[df['is_train']].reset_index(drop=True)
df_val = df[~df['is_train']].reset_index(drop=True)

df_train.shape[0], df_val.shape[0]

In [None]:
class ReefDataset:

    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms

    def get_boxes(self, row):
        # this function returns the bboxes for a given row as a 3D matrix with format [x_min, y_min, x_max, y_max]
        
        boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(float).values
        
        # Transformation from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        boxes[:, 2] = np.clip(boxes[:, 2], 0, 1280)
        boxes[:, 3] = np.clip(boxes[:, 3], 0, 720)
        
        return boxes
    
    def get_image(self, row):
        #Gets the image for a given row
        
        image = cv2.imread(f'{BASE_DIR}/{row["image_path"]}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        return image
    
    def __getitem__(self, i):

        row = self.df.iloc[i]
        image = self.get_image(row)
        boxes = self.get_boxes(row)
        
        n_boxes = boxes.shape[0]
        
        # Calculate the area
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        
        target = {
            'boxes': torch.as_tensor(boxes, dtype=torch.float32),
            'area': torch.as_tensor(area, dtype=torch.float32),
            
            'image_id': torch.tensor([i]),
            
            # There is only one class
            'labels': torch.ones((n_boxes,), dtype=torch.int64),
            
            'iscrowd': torch.zeros((n_boxes,), dtype=torch.int64)            
        }

        
        sample = {
            'image': image,
            'bboxes': target['boxes'],
            'labels': target['labels']
        }
        sample = self.transforms(**sample)
        image = sample['image']

        if n_boxes > 0:
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)

        return image, target

    def __len__(self):
        return len(self.df)

In [None]:
# here we experiment image augmentation using simple transformations 

def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
# defining the data sets to be used after augmentation
ds_train = ReefDataset(df_train, get_train_transform())
ds_val = ReefDataset(df_val, get_valid_transform())

In [None]:
# testing whether the augmentations work on an image and checking output
try:
    idx = df_train[df_train.annotations.str.len() > 12].iloc[0].name
except:
    idx = 0
    

image, targets = ds_train[idx]


boxes = targets['boxes'].cpu().numpy().astype(np.int32)
img = image.permute(1,2,0).cpu().numpy()
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(img,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 3)
    
ax.set_axis_off()
ax.imshow(img);

In [None]:
#load the data for the model
def collate_fn(batch):
    return tuple(zip(*batch))

dl_train = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, collate_fn=collate_fn)
dl_val = DataLoader(ds_val, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, collate_fn=collate_fn)

In [None]:
def get_model():
    # load a model that has been pre-trained from  pytorch libraries
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    num_classes = 2  # 1st class is that of the starfish while the second is that of the background

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    model.to(DEVICE)
    return model

model = get_model()

In [None]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

n_batches, n_batches_val = len(dl_train), len(dl_val)
val_losses = []
val_box_losses = []

for epoch in range(NUM_EPOCHS):
    
    model.train()
    
    time_start = time.time()
    loss_accum = 0
    loss_box_accum = 0
    
    for batch_idx, (images, targets) in enumerate(dl_train, 1):
        images = list(image.float().to(DEVICE) for image in images)
        targets = [{k: v.to(torch.float32).to(DEVICE) if "box" in k else v.to(DEVICE) for k, v in t.items()} for t in targets]

        # This dict has the following keys:
        #    loss_classifier, loss_box_reg, loss_objectness, loss_rpn_box_reg
        loss_dict = model(images, targets)
        
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_accum += loss_value
        loss_box_accum += loss_dict['loss_box_reg'].item()
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        
    # Validation step!
    val_loss_accum = 0
    val_loss_box_accum = 0
    
    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(dl_val, 1):
            images = list(image.float().to(DEVICE) for image in images)
            targets = [{k: v.to(torch.float32).to(DEVICE) if "box" in k else v.to(DEVICE) for k, v in t.items()} for t in targets]
            
            val_loss_dict = model(images, targets)
            val_batch_loss = sum(loss for loss in val_loss_dict.values())
            
            val_loss_accum += val_batch_loss.item()
            val_loss_box_accum += val_loss_dict['loss_box_reg'].item()

    
    # Calculate epoch losses
    val_loss = val_loss_accum / n_batches_val
    val_loss_box = val_loss_box_accum / n_batches_val
    
    train_loss = loss_accum / n_batches
    train_loss_box = loss_box_accum / n_batches
    
    val_losses.append(val_loss)
    val_box_losses.append(val_loss_box)
    
    # Save model
    chk_name = f'pytorch_model-e{epoch}.bin'
    torch.save(model.state_dict(), chk_name)
    
    
    # Logging
    elapsed = time.time() - time_start
    
    prefix = f"[Epoch {epoch+1:2d} / {NUM_EPOCHS:2d}]"
    print()
    print(f"{prefix} Train loss: {train_loss:.3f}.  Train loss (bbox only): {train_loss_box:.3f}.  Val loss (bbox only): {val_loss_box:.3f}")   
    print(prefix)
    print(f"{prefix} Saved to  : {chk_name}  [{elapsed:.0f} secs]")
    print(f"{prefix} Val loss  : {val_loss:.3f}")

In [None]:
# Best model based on lowest validation loss
np.argmin(val_losses)

In [None]:
# Best model based on lowest bbox loss
np.argmin(val_box_losses)