# Section 1: Exploratory Analysis


In [None]:
import cv2
from matplotlib import pyplot as plt
import pandas as pd 


In [None]:
train_metadata = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
train_metadata[train_metadata.annotations!="[]"]

In [None]:
from ast import literal_eval

def load_image(video_id, image_id):
    path = f"../input/tensorflow-great-barrier-reef/train_images/video_{video_id}/{image_id}.jpg"
    img = cv2.imread(path)
    return img

def plot_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    plt.imshow(img)
    return plt.show()

def parse_annotations(annotations):
    return literal_eval(annotations)

def load_image_with_annotations(video_id, image_id, annotations):
    img = load_image(video_id, image_id)
    for ret in parse_annotations(annotations):
        cv2.rectangle(img,
                      (ret['x'], ret['y']),
                      (ret['x'] + ret['width'], ret['y'] + ret['height']),
                      (0,0,255),
                      2)
    return img

plot_image(load_image(0, 16))
img = load_image_with_annotations(
                            train_metadata.video_id.iloc[16],
                            train_metadata.video_frame.iloc[16],
                            train_metadata.annotations.iloc[16],
                            )
figsize = (16,8)
plt.figure(figsize=figsize)
plot_image(img)


In [None]:
from IPython.display import clear_output

plt.figure(figsize=(16,8))
for i in train_metadata[train_metadata.annotations!="[]"].index:
    img = load_image_with_annotations(
                            train_metadata.video_id.iloc[i],
                            train_metadata.video_frame.iloc[i],
                            train_metadata.annotations.iloc[i],
                            )
    plot_image(img)
    clear_output(wait=True)

In [None]:
shapes = []

for i in train_metadata.index:
    img = load_image(train_metadata.video_id.iloc[i], train_metadata.video_frame.iloc[i])
    shapes.append(img.shape)
    
print(set(shapes))

# Section 2: Simple model

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
model

In [None]:
# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 2  # 1 class (starfish) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import torch
import os

class StarfishDataset(torch.utils.data.Dataset):
    def __init__(self, transforms=None):
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir("../input/tensorflow-great-barrier-reef/train_images/video_0")))
        #self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join("../input/tensorflow-great-barrier-reef/train_images/video_0", self.imgs[idx])
        annotation = parse_annotations(train_metadata["annotations"].iloc[idx])
        print(annotation)
        img = Image.open(img_path).convert("RGB")
        #img = torch.as_tensor(img, dtype=torch.float32)
        # get bounding box coordinates for each mask
        num_objs = len(annotation)
        boxes = []
        for annotations in annotation:
            boxes.append((annotations["x"], annotations["y"], annotations["x"]+annotations["width"], annotations["y"]+annotations["height"]))

        # convert everything into a torch.Tensor
        if len(annotation) == 0:
            boxes = [(1, 1, 2, 2)]
            labels = torch.zeros((1,), dtype=torch.int64)
        else:
            labels = torch.ones((num_objs,), dtype=torch.int64)
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        boxes = boxes.squeeze()
        labels = labels.squeeze()
        # there is only one class

        image_id = torch.tensor([idx])
        #area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])


        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        #target["masks"] = masks
        target["image_id"] = image_id
        #target["area"] = area

        if self.transforms is not None:
            img = self.transforms(img)
        print(img)
        print(target)
        return img, target

    def __len__(self):
        return len(self.imgs)
    


In [None]:
import torchvision.transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
#     if train:
#         transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [None]:
dataset = StarfishDataset(transforms=get_transform(train=True))
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=1, shuffle=True, num_workers=0)

# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)

targets = [{k: v for k, v in targets.items()}]
output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions

metadata

In [None]:
predictions

In [None]:
!ls ../input/tensorflow-great-barrier-reef/train_images/vide

In [None]:
train_with_annotations = train_metadata[train_metadata.annotations!="[]"]

In [None]:


data = []
all_labels = []
all_bboxes = []
image_id = []
# loop over the rows
for idx, row in train_with_annotations.iterrows():
    bboxes = []
    labels = []

    image = load_image(row["video_id"], row["video_frame"])
    (h, w) = image.shape[:2]
    # scale the bounding box coordinates relative to the spatial
    # dimensions of the input image
#     startX = float(startX) / w
#     startY = float(startY) / h
#     endX = float(endX) / w
#     endY = float(endY) / h
    # load the image and preprocess it
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))
    # update our list of data, class labels, bounding boxes, and
    # image paths
    data.append(image)
    annotations = parse_annotations(row["annotations"]) 

    for annotation in annotations:
        labels.append(1)
        bboxes.append((annotations["x"], annotations["y"], annotations["x"]+annotations["width"], annotations["y"]+annotations["height"]))
    
    image_id.append(row["image_id"])
    all_bboxes.append(bboxes)
    all_labels.append(labels)
    imagePaths.append(imagePath)
    break

In [None]:
data = np.array(data, dtype="float32")
labels = np.array(labels)
bboxes = np.array(bboxes, dtype="float32")
imagePaths = np.array(imagePaths)

(trainImages, testImages) = torch.tensor(trainImages), torch.tensor(testImages)


# create data loaders
trainDS = CustomTensorDataset((trainImages, trainLabels, trainBBoxes))
batch_size = 8



# Section 3: Submit Test predictions