In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

## Dataset.py

In [None]:
import torch
import torch.nn as nn
from skimage import io
import albumentations
from albumentations.pytorch.transforms import ToTensorV2

class ReefDataset(torch.utils.data.Dataset):
    def __init__(self,df,transforms=None):
        self.df = df
        self.transforms=transforms
    def __len__(self):
        return len(self.df)
    def can_augment(self, boxes):
        """ Check if bounding boxes are OK to augment
        
        ###### https://www.kaggle.com/julian3833/reef-starter-torch-fasterrcnn-train-lb-0-416
        For example: image_id 1-490 has a bounding box that is partially outside of the image
        It breaks albumentation
        Here we check the margins are within the image to make sure the augmentation can be applied
        """
        
        box_outside_image = ((boxes[:, 0] < 0).any() or (boxes[:, 1] < 0).any() 
                             or (boxes[:, 2] > 1280).any() or (boxes[:, 3] > 720).any())
        return not box_outside_image
    
    def __getitem__(self,item):
        row = self.df.iloc[item]
        image = io.imread(f'{"../input/tensorflow-great-barrier-reef/train_images/"}/{row["image_path"]}')
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        image /= 255.0
        
        boxes = pd.DataFrame(row['annotations'], columns=['x', 'y', 'width', 'height']).astype(float).values
        # Change from [x_min, y_min, w, h] to [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        
        n_boxes = boxes.shape[0]  ## I know number of 
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        target = {
            'boxes': torch.tensor(boxes, dtype=torch.float32),
            'area': torch.tensor(area, dtype=torch.float32),
            
            'image_id': torch.tensor([item]),
            
            # There is only one class
            'labels': torch.ones((n_boxes,), dtype=torch.int64),
            
            # Suppose all instances are not crowd
            'iscrowd': torch.zeros((n_boxes,), dtype=torch.int64)            
        }
        
        if self.transforms and self.can_augment(boxes):
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': target['labels']
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            if n_boxes > 0:
                target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
        else:
            image = ToTensorV2(p=1.0)(image=image)['image']
            
        return {
            'image': image,
            'target': target,
        }

## Engine.py

In [None]:
def train(model, dataloader, optimizer):
    model.train()
    for data in dataloader:
        images = list(image.to('cuda') for image in data['images'])
        for tsor in range(len(data)):
            
            data['targets'][tsor]['boxes'] = data['targets'][tsor]['boxes'].to('cuda')
            data['targets'][tsor]['area'] = data['targets'][tsor]['area'].to('cuda')
            data['targets'][tsor]['image_id'] = data['targets'][tsor]['image_id'].to('cuda')
            data['targets'][tsor]['labels'] = data['targets'][tsor]['labels'].to('cuda')
            data['targets'][tsor]['iscrowd'] = data['targets'][tsor]['iscrowd'].to('cuda')
        
        optimizer.zero_grad()
        outputs = model(images, data['targets'])
        losses = sum(loss for loss in outputs.values())
        losses.backward()
        optimizer.step()
        
def valid(model, dataloader):
    model.eval()
    val_loss_accum = 0
    with torch.no_grad():
        for data in dataloader:
            images = list(image.to('cuda') for image in data['images'])
            for tsor in range(len(data)):
            
                data['targets'][tsor]['boxes'] = data['targets'][tsor]['boxes'].to('cuda')
                data['targets'][tsor]['area'] = data['targets'][tsor]['area'].to('cuda')
                data['targets'][tsor]['image_id'] = data['targets'][tsor]['image_id'].to('cuda')
                data['targets'][tsor]['labels'] = data['targets'][tsor]['labels'].to('cuda')
                data['targets'][tsor]['iscrowd'] = data['targets'][tsor]['iscrowd'].to('cuda')
        
            
            val_loss_dict = model(images, data['targets'])
            val_batch_loss = sum(loss for loss in val_loss_dict.values())
            val_loss_accum += val_batch_loss.item()
    val_loss = val_loss_accum / len(data)
    return val_loss
    

# model.py

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

def get_model():
    # load a model; pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    num_classes = 2  # 1 class (starfish) + background

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    model.to('cuda')
    
    return model

# main.py

In [None]:
## albumentations Augmentations

def get_train_transform():
    return albumentations.Compose([
        albumentations.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


def get_valid_transform():
    return albumentations.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
class Collate:

    def __call__(self, batch):
        output = {}
        output['images'] = [sample["image"] for sample in batch]
        output['targets'] = [sample["target"] for sample in batch]

        return output

In [None]:
## Loading Files . . . DF-TRAINING and DF-Validation
df = pd.read_csv("../input/reef-cv-strategy-subsequences-dataframes/train-validation-split/train-0.1.csv")

df['annotations'] = df['annotations'].apply(eval)

# Create the image path for the row

df['image_path'] = "video_" + df['video_id'].astype(str) + "/" + df['video_frame'].astype(str) + ".jpg"

df_train, df_val = df[df['is_train']], df[~df['is_train']]

# https://discuss.pytorch.org/t/fasterrcnn-images-with-no-objects-present-cause-an-error/117974/3
df_train = df_train[df_train.annotations.str.len() > 0 ].reset_index(drop=True)
df_val = df_val[df_val.annotations.str.len() > 0 ].reset_index(drop=True)


# train_dataset = ReefDataset(df_train, get_train_transform())
# valid_dataset = ReefDataset(df_val, get_train_transform())

train_dataset = ReefDataset(df_train,)
valid_dataset = ReefDataset(df_val,)

# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=False, num_workers=2)
# valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=8, shuffle=False, num_workers=2)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=False, num_workers=2, collate_fn=Collate() )
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=8, shuffle=False, num_workers=2, collate_fn=Collate() )


In [None]:
for data in train_loader:
    images = list(image.to('cuda') for image in data['images'])
    print(images['images'][7])
    break

In [None]:
model = get_model()
params = [p for p in model.parameters() if p.requires_grad]
EPOCHS = 10

optimizer = torch.optim.SGD(params, lr=0.0025, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


for epoch in range(EPOCHS):
    train(model, train_loader, optimizer)
    val_loss = valid(model, valid_loader)
    chk_name = f'fasterrcnn_resnet50_fpn-e{epoch}.bin'
    torch.save(model.state_dict(), chk_name)
    print(f'validation loss : {val_loss}')
