## RetinaNet in Pytorch - Tensorflow Great Barrier Reef

I wrote this notebook using following two notebooks

1. https://www.kaggle.com/jainamshah17/gwd-retinanet-pytorch-train
2. https://www.kaggle.com/julian3833/reef-starter-torch-fasterrcnn-train-lb-0-416/notebook


In [None]:
!git clone https://github.com/yhenon/pytorch-retinanet.git
!cp -r /kaggle/working/pytorch-retinanet/retinanet ./
!pip install -q pycocotools

In [None]:
import os
import re
import cv2
import time
import numpy as np
import pandas as pd

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torch.optim as optim
import torchvision.transforms as T
from torchvision.utils import make_grid 
from torch.utils.data import DataLoader, Dataset

from retinanet import model
from retinanet.dataloader import collater, Resizer, Augmenter, Normalizer, UnNormalizer

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

BASE_DIR = "../input/tensorflow-great-barrier-reef/train_images"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else torch.device('cpu'))
NUM_EPOCHS = 5

In [None]:
df = pd.read_csv(r'../input/reef-a-cv-strategy-subsequences/train-validation-split/train-0.1.csv')

display(df)

# Turn annotations from strings into lists of dictionaries
df['annotations'] = df['annotations'].apply(eval)
# Create the image path for the row
df['image_path'] = "video_" + df['video_id'].astype(str) + "/" + df['video_frame'].astype(str) + ".jpg"

df.head()

In [None]:
df_train, df_val = df[df['is_train']], df[~df['is_train']]

In [None]:
# Removing the instances with no target
df_train = df_train[df_train.annotations.str.len() > 0 ].reset_index(drop=True)
df_val = df_val[df_val.annotations.str.len() > 0 ].reset_index(drop=True)

In [None]:
# remove later
df_train.shape[0], df_val.shape[0]

In [None]:
class ReefDataset:

    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms

    def get_boxes(self, row):
        """Returns the bboxes for a given row as a 3D matrix with format [x_min, y_min, x_max, y_max]"""
        
        records = pd.DataFrame(row['annotations'])
        boxes = np.zeros((records.shape[0], 5))
        
        boxes[:, 0:4] = records[['x', 'y', 'width', 'height']].values
        # Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        # to correct out of box annotations
        boxes[:, 0] = np.maximum(0, boxes[:, 0])
        boxes[:, 1] = np.maximum(0, boxes[:, 1])
        boxes[:, 2] = np.minimum(1280, boxes[:, 2])
        boxes[:, 3] = np.minimum(720, boxes[:, 3])
        return boxes
    
    def get_image(self, row):
        """Gets the image for a given row"""
        
        image = cv2.imread(f'{BASE_DIR}/{row["image_path"]}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        return image
    
    def __getitem__(self, i):

        row = self.df.iloc[i]
        image = self.get_image(row)
        boxes = self.get_boxes(row)
        
        n_boxes = boxes.shape[0]
        
        # Calculate the area
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        sample = {
                'img': image,
                'annot': boxes,
            }
    
        if self.transforms :
            sample = self.transforms(sample)
        

        return sample

    def __len__(self):
        return len(self.df)

In [None]:
def get_train_transform():
    return T.Compose([Augmenter(), Normalizer(), Resizer()])

def get_valid_transform():
    return T.Compose([Normalizer(), Resizer()])

In [None]:
# Define datasets
ds_train = ReefDataset(df_train, get_train_transform())
ds_val = ReefDataset(df_val, get_valid_transform())

In [None]:
# Define dataloaders
dl_train = DataLoader(ds_train, batch_size=8, shuffle=False, num_workers=4, collate_fn=collater)
dl_val = DataLoader(ds_val, batch_size=8, shuffle=False, num_workers=4, collate_fn=collater)

### Set Up for training

In [None]:
retinanet = model.resnet50(num_classes = 2, pretrained = True)
retinanet.to(DEVICE)
optimizer = torch.optim.Adam(retinanet.parameters(), lr = 0.0001)

In [None]:
from tqdm.notebook import tqdm

def train_one_epoch(epoch_num, train_data_loader):
    retinanet.train()

    epoch_loss = []

    for iter_num, data in tqdm(enumerate(train_data_loader)):
                
        # Reseting gradients after each iter
        optimizer.zero_grad()
            
        # Forward
        classification_loss, regression_loss = retinanet([data['img'].to(DEVICE).float(), data['annot'].to(DEVICE).float()])
                
        # Calculating Loss
        classification_loss = classification_loss.mean()
        regression_loss = regression_loss.mean()

        loss = classification_loss + regression_loss

        if bool(loss == 0):
            continue
                
        # Calculating Gradients
        loss.backward()

        # Gradient Clipping
        torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
           
        # Updating Weights
        optimizer.step()

        epoch_loss.append(float(loss))

            
        print(
            'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss)))

        del classification_loss
        del regression_loss

In [None]:
def valid_one_epoch(epoch_num, valid_data_loader):
    
    epoch_loss = []

    for iter_num, data in tqdm(enumerate(valid_data_loader)):
                
        with torch.no_grad():
            
            # Forward
            classification_loss, regression_loss = retinanet([data['img'].to(DEVICE).float(), data['annot'].to(DEVICE).float()])

            # Calculating Loss
            classification_loss = classification_loss.mean()
            regression_loss = regression_loss.mean()
            loss = classification_loss + regression_loss

            #Epoch Loss
            epoch_loss.append(float(loss))

            print(
                'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(
                    epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(epoch_loss)))

            del classification_loss
            del regression_loss
        
    # Save Model after each epoch
    torch.save(retinanet, f"retinanet_barrier_reef_epoch{epoch_num}.pt")
    
  

In [None]:
## Training Loop
for epoch in tqdm(range(NUM_EPOCHS)):
    print("Epoch - {} Started".format(epoch))    
    train_one_epoch(epoch, dl_train)
    valid_one_epoch(epoch, dl_val)