In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# u! Hope this notebook can be helpful for getting a better understanding about the data.

import os

from PIL import Image, ImageDraw
import cv2
import re
import pandas as pd
import numpy as np
from tqdm import tqdm

from matplotlib import pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
from IPython.display import Video, display

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
dataset = {
    'root_dir': '../input/tensorflow-great-barrier-reef',
    'train_csv': '../input/tensorflow-great-barrier-reef/train.csv',
    'test_csv': '../input/tensorflow-great-barrier-reef/test.csv',
    'sample_submission_csv': '../input/tensorflow-great-barrier-reef/example_sample_submission.csv',
    'video_img_dir': '../input/tensorflow-great-barrier-reef/train_images'
}

In [None]:
train_csv = pd.read_csv(dataset['train_csv'])
test_csv = pd.read_csv(dataset['test_csv'])

In [None]:
train_csv.head()

In [None]:
print("number of frames:", len(train_csv))

In [None]:
frame_counts = train_csv['video_id'].value_counts().sort_values().to_frame()
frame_counts.head()

In [None]:
print("number of records in video_0 matched: ", frame_counts.loc[0]['video_id'] == len(os.listdir(os.path.join(dataset['video_img_dir'], 'video_0'))))
print("number of records in video_0 matched: ", frame_counts.loc[1]['video_id'] == len(os.listdir(os.path.join(dataset['video_img_dir'], 'video_1'))))
print("number of records in video_0 matched: ", frame_counts.loc[2]['video_id'] == len(os.listdir(os.path.join(dataset['video_img_dir'], 'video_2'))))

In [None]:
sequence_counts = train_csv['sequence'].value_counts().sort_values().reset_index()
sequence_counts.columns = [['sequence', 'num_frames']]
print("number of sequences:", len(sequence_counts))
sequence_counts.head()

In [None]:
test_csv

In [None]:
num_no_obj_frame = train_csv[train_csv.annotations == '[]']['annotations'].count()
print("number of frames without objects:", num_no_obj_frame)

In [None]:
num_with_obj_frame = train_csv[train_csv.annotations != '[]']['annotations'].count()
print("number of frames with objects:", num_with_obj_frame)

In [None]:
train_csv[train_csv.annotations != '[]'].head()

In [None]:
print('ratio of frames with objects:', num_with_obj_frame / len(train_csv))

fig, axes = plt.subplots(1,1, figsize=(12, 6))

sns.barplot(ax=axes, x=['Number of Frames with Objects', 'Number of Frames with No Objects'], y=[num_with_obj_frame, num_no_obj_frame])
axes.set_title("Distribution of Frames with/without Objects")
axes.set_xlabel("Frame Types")
axes.set_ylabel("Count")

plt.show()

In [None]:
def decode_annotation(annot_line):
    # annot_line example: [{'x': 540, 'y': 310, 'width': 113, 'height': 105}, {'x': 657, 'y': 501, 'width': 95, 'height': 56}]
    boxes = []
    
    box_pattern = r'\{\'\w\'\:\s\d+\,\s\'\w\'\:\s\d+\,\s\'\w+\'\:\s\d+\,\s\'\w+\'\:\s\d+\}'
    val_pattern = r'\d+'
    
    annotations = re.findall(box_pattern, annot_line)
    for annot in annotations:
        x, y, width, height = re.findall(val_pattern, annot)
        x, y, width, height = float(x), float(y), float(width), float(height)
        confidence = 1.0
        
        box = [x, y, width, height, confidence]
        boxes.append(box)
        
    return boxes

def count_boxes(annot_line):
    
    annot_line  = annot_line[1:-1]
    box_pattern = r'\{\'\w\'\:\s\d+\,\s\'\w\'\:\s\d+\,\s\'\w+\'\:\s\d+\,\s\'\w+\'\:\s\d+\}'
    val_pattern = r'\d+'
    
    annotations = re.findall(box_pattern, annot_line)
    
    return len(annotations)


def test_decode_annotation(annot_line):
    print("sample:", annot_line)
    boxes = decode_annotation(annot_line)
    for i, box in enumerate(boxes):
        print(f"box {i}:", box)

In [None]:
test_samples = [
    "[{'x': 540, 'y': 310, 'width': 113, 'height': 105}, {'x': 657, 'y': 501, 'width': 95, 'height': 56}, {'x': 257, 'y': 101, 'width': 42, 'height': 59}]",
    "[{'x': 540, 'y': 310, 'width': 113, 'height': 105}, {'x': 657, 'y': 501, 'width': 95, 'height': 59}]",
    "[{'x': 12, 'y': 250, 'width': 143, 'height': 82}]",
    "[]"
]

for i, sample in enumerate(test_samples):
    num_boxes = count_boxes(sample)
    print(f"Test {i+1}:", f"found {num_boxes} boxes")
    
    test_decode_annotation(sample)
    print("")

In [None]:
train_csv['num_boxes'] = train_csv['annotations'].apply(count_boxes)


In [None]:
train_csv[train_csv.annotations != '[]'].head()

In [None]:
boxes_dist = train_csv[train_csv.annotations != '[]']['num_boxes'].value_counts().sort_values(ascending=False).reset_index()
boxes_dist.columns = ['num_boxes', 'num_frames']
boxes_dist

In [None]:
fig = plt.figure(figsize=(24, 8))
sns.barplot(x=boxes_dist.num_boxes, y=boxes_dist.num_frames)

plt.title("Box Distribution")
plt.xlabel("Number of Boxes")
plt.ylabel("Frame Counts")

plt.show()

In [None]:
def gen_file_path(image_id):
    # extract file path by using the image_id in the train file
    video_id = image_id.split('-')[0]
    image_id = image_id.split('-')[1]
    return os.path.join(dataset['video_img_dir'], 'video_' + video_id, image_id + '.jpg')

def draw_boxes(image_path, annot_line):
    
    boxes = decode_annotation(annot_line)

    coords = [] 
    for box in boxes: 
        coord = [] 
        coord.append(box[0]) 
        coord.append(box[1]) 
        coord.append(box[0] + box[2]) 
        coord.append(box[1] + box[3]) 
        coords.append(coord) 

    image = Image.open(image_path)
    imgcp = image.copy()
    imgcp_draw = ImageDraw.Draw(imgcp)

    for coord in  coords:
         imgcp_draw.rectangle(coord, fill = None, outline = "red", width=5)

    return imgcp

In [None]:
train_csv['file_path'] = train_csv['image_id'].apply(gen_file_path)

In [None]:
train_csv.head()

In [None]:
samples = train_csv.groupby('num_boxes').first()

In [None]:
plt.figure(figsize=(24, 36))

r, c = 7, 3
for index, row in samples.iterrows():
    image_path = row['file_path']
    annot_line = row['annotations']
    plt.subplot(r, c, index + 1)
    dimg = draw_boxes(image_path, annot_line)
    plt.imshow(dimg)
    
plt.tight_layout()
plt.show()

In [None]:
all_boxes_xy = []
all_boxes_wh = []

for index, row in tqdm(train_csv.iterrows(), total=len(train_csv)):
    if row['annotations'] != '[]':
        boxes = decode_annotation(row['annotations'])
        
        for box in boxes:
            all_boxes_xy.append([box[0], box[1]])
            all_boxes_wh.append([box[2], box[3]])
            
all_boxes_xy = np.array(all_boxes_xy)
all_boxes_wh = np.array(all_boxes_wh)

In [None]:
box_center_df = pd.DataFrame.from_records(all_boxes_xy, columns=['x', 'y'])

box_shape_df  = pd.DataFrame.from_records(all_boxes_wh, columns=['width', 'height'])
box_shape_df['area'] = box_shape_df['width'] * box_shape_df['height']

In [None]:
box_center_df.describe()

In [None]:
box_shape_df.describe()

In [None]:
plt.figure(figsize=(28, 16))
plt.scatter(x=all_boxes_xy[:,0], y=all_boxes_xy[:,1], s=0.5, color = 'red')
plt.title("Distribution of Box Center Coordinate on Image")
plt.xlabel("X value")
plt.ylabel("Y value")
plt.show()

# Sequence

In [None]:
train_csv.groupby('sequence')['num_boxes'].sum().sort_values(ascending=False).to_frame().T

In [None]:
train_csv.groupby('sequence')['image_id'].count().sort_values(ascending=False).to_frame().T


In [None]:
sample_seq = train_csv[train_csv.sequence == 22643]
sample_seq

In [None]:
from PIL import Image
img = Image.open(train_csv['file_path'][0])

In [None]:
plt.imshow(np.array(img))
img.size

# Creating Dataset

In [None]:
import torch
from torch.utils.data import DataLoader
!pip install -qU torch_snippets
from torch_snippets import *

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
label2target = {}
label2target['starfish'] = 1
label2target['background'] = 0
target2label = {t:l for l,t in label2target.items()}
def preprocess_image(img):
    img = torch.tensor(img).permute(2,0,1)
    return img.to(device).float()
class OpenDataset(torch.utils.data.Dataset):
    w, h = 1280 , 720
    def __init__(self, df):
#         self.image_dir = image_dir
#         self.files = glob.glob(self.image_dir+'/*')
        self.df = df
        self.image_infos = df['file_path'].values
    def __getitem__(self, ix):
        # load images and masks
#         image_id = self.image_infos[ix]
        img_path = self.image_infos[ix]
        img = Image.open(img_path).convert("RGB")
        img = np.array(img.resize((self.w, self.h), resample=Image.BILINEAR))/255.
#         data = self.df[self.df['Image_ID'] == image_id]
#         if self.df['annotations'].values[ix] != '[]':
        data = []
        boxes = decode_annotation(self.df['annotations'].values[ix]) 
        for box in boxes:
            data.append(pd.Series([box[0], box[1],box[0]+box[2],box[1]+box[3]]).astype(np.uint32).tolist())
#             data = data.astype(np.uint32).tolist() # convert to absolute coordinates
        # torch FRCNN expects ground truths as a dictionary of tensors

        labels =['starfish']*len(data)
        target = {}
        target["boxes"] = torch.Tensor(data).float()
        target["labels"] = torch.Tensor([1]*len(data)).long()
        img = preprocess_image(img)
        return img, target
    def collate_fn(self, batch):
        return tuple(zip(*batch)) 

    def __len__(self):
        return len(self.image_infos)

In [None]:
data = train_csv[train_csv.annotations != "[]"]
from sklearn.model_selection import train_test_split
trn_ids, val_ids = train_test_split(data.image_id.unique(), test_size=0.1, random_state=99)
trn_df, val_df = data[data['image_id'].isin(trn_ids)], data[data['image_id'].isin(val_ids)]

train_ds = OpenDataset(trn_df)
test_ds = OpenDataset(val_df)
train_loader = DataLoader(train_ds, batch_size=4, collate_fn=train_ds.collate_fn, drop_last=True)
test_loader = DataLoader(test_ds, batch_size=4, collate_fn=test_ds.collate_fn, drop_last=True)
len(trn_df), len(val_df)

# Model Testing

In [None]:
num_classes = 2
device = 'cuda' if torch.cuda.is_available() else 'cpu'
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor


def get_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [None]:
# Defining training and validation functions for a single batch
def train_batch(inputs, model, optimizer):
    model.train()
    input, targets = inputs
    input = list(image.to(device) for image in input)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    optimizer.zero_grad()
    losses = model(input, targets)
    loss = sum(loss for loss in losses.values())
    loss.backward()
    optimizer.step()
    return loss, losses

@torch.no_grad() # this will disable gradient computation in the function below
def validate_batch(inputs, model):
    model.train() # to obtain the losses, model needs to be in train mode only. # #Note that here we are not defining the model's forward method 
#and hence need to work per the way the model class is defined
    input, targets = inputs
    input = list(image.to(device) for image in input)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    optimizer.zero_grad()
    losses = model(input, targets)
    loss = sum(loss for loss in losses.values())
    return loss, losses

In [None]:
model = get_model().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
n_epochs = 8
log = Report(n_epochs)

In [None]:
for epoch in range(n_epochs):
    _n = len(train_loader)
    for ix, inputs in enumerate(train_loader):
        loss, losses = train_batch(inputs, model, optimizer)
        loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
            [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
        pos = (epoch + (ix+1)/_n)
        log.record(pos, trn_loss=loss.item(), trn_loc_loss=loc_loss.item(), 
                   trn_regr_loss=regr_loss.item(), trn_objectness_loss=loss_objectness.item(),
                   trn_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')

    _n = len(test_loader)
    for ix,inputs in enumerate(test_loader):
        loss, losses = validate_batch(inputs, model)
        loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
          [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
        pos = (epoch + (ix+1)/_n)
        log.record(pos, val_loss=loss.item(), val_loc_loss=loc_loss.item(), 
                  val_regr_loss=regr_loss.item(), val_objectness_loss=loss_objectness.item(),
                  val_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')
    if (epoch+1)%(n_epochs//2)==0: log.report_avgs(epoch+1)

In [None]:
log.plot_epochs(['trn_loss','val_loss'])

In [None]:
inputt = next(iter(test_loader))[0]
input1 = next(iter(test_loader))[1]

In [None]:
model.eval()
model(inputt[0].unsqueeze(0))

In [None]:
from torchvision.ops import nms
def decode_output(output):
    'convert tensors to numpy arrays'
    bbs = output['boxes'].cpu().detach().numpy().astype(np.uint16)
    labels = np.array([target2label[i] for i in output['labels'].cpu().detach().numpy()])
    confs = output['scores'].cpu().detach().numpy()
    ixs = nms(torch.tensor(bbs.astype(np.float32)), torch.tensor(confs), 0.05)
    bbs, confs, labels = [tensor[ixs] for tensor in [bbs, confs, labels]]

    if len(ixs) == 1:
        bbs, confs, labels = [np.array([tensor]) for tensor in [bbs, confs, labels]]
    return bbs.tolist(), confs.tolist(), labels.tolist()

In [None]:
model.eval()
for ix, (images, targets) in enumerate(test_loader):
    if ix==3: break
    images = [im for im in images]
    outputs = model(images)
    for ix, output in enumerate(outputs):
        bbs, confs, labels = decode_output(output)
        info = [f'{l}@{c:.2f}' for l,c in zip(labels, confs)]
        show(images[ix].cpu().permute(1,2,0), bbs=bbs, texts=labels, sz=7)

In [None]:
PATH ='TF-OD1.pth'
PATH1 ='TF-OD2.pth'
torch.save(model.state_dict(), PATH)
torch.save(model, PATH1)