<a href="https://colab.research.google.com/github/raitharnett/tensorflow-great-barrier-reef/blob/main/cots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [62]:
%%capture
!pip install --upgrade --force-reinstall --no-deps kaggle

In [64]:
%alias install_kaggle mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

In [63]:
%alias download_cots_data kaggle competitions download -c tensorflow-great-barrier-reef --force >& /dev/null

In [None]:
%alias install_cots_data unzip -d  tensorflow-great-barrier-reef tensorflow-great-barrier-reef.zip >& /dev/null

In [65]:
# download tensorflow-great-barrier-reef data and import library used to submit predictions
from google.colab import files
import os
import sys
COTS_DATA = '/content/tensorflow-great-barrier-reef'
if not os.path.isdir(COTS_DATA):
  uploaded = files.upload()
  for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=fn, length=len(uploaded[fn])))
  %install_kaggle
  %download_cots_data
  %install_cots_data
sys.path.insert(0, COTS_DATA)
import greatbarrierreef

In [66]:
import os
import pathlib

# Clone the tensorflow models repository if it doesn't already exist
if "models" in pathlib.Path.cwd().parts:
  while "models" in pathlib.Path.cwd().parts:
    os.chdir('..')
elif not pathlib.Path('models').exists():
  !git clone --depth 1 https://github.com/tensorflow/models

In [34]:
# Install the Object Detection API
%%capture
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [102]:
import contextlib2
import io
import IPython
import json
import numpy as np
import os
import pathlib
import pandas as pd
import sys
import tensorflow as tf
import time

from PIL import Image, ImageDraw
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [109]:
from object_detection.utils import dataset_util
from object_detection.dataset_tools import tf_record_creation_util
COTS_DATA_IMAGES = os.path.join(COTS_DATA,'train_images')
def createCOTSTFExample(row):
  with tf.io.gfile.GFile(row.image_path, 'rb') as fid:
    encodedJpg = fid.read()
  encodedJpgIO = io.BytesIO(encodedJpg)
  image = Image.open(encodedJpgIO)
  width = image.size[0]
  height = image.size[1]
  fileName = f'{row.image_id}'.encode('utf8')
  imageFormat = 'jpeg'.encode('utf8')
  classesText = []
  classes = []
  annotations = json.loads(row.annotations.replace("'", '"'))
  xmin = []
  ymin = []
  xmax = []
  ymax = []
  for annotation in annotations:
    xmin.append(annotation['x'] / width) 
    xmax.append((annotation['x'] + annotation['width']) / width) 
    ymin.append(annotation['y'] / height) 
    ymax.append((annotation['y'] + annotation['height']) / height) 
    classesText.append('COTS'.encode('utf8'))
    classes.append(1)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(fileName),
      'image/source_id': dataset_util.bytes_feature(fileName),
      'image/encoded': dataset_util.bytes_feature(encodedJpg),
      'image/format': dataset_util.bytes_feature(imageFormat),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
      'image/object/class/text': dataset_util.bytes_list_feature(classesText),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))



def loadCOTS(df):
  for _, cotsRow in df.iterrows():
    createCOTSTFExample(cotsRow) 


In [110]:
cotsDF = pd.read_csv(os.path.join(COTS_DATA,'train.csv'))
# image_path
cotsDF['image_path'] = cotsDF.apply(lambda r: os.path.join(COTS_DATA_IMAGES,f"video_{r.video_id}",f"{r.video_frame}.jpg"), axis=1)
# training data load
loadCOTS(cotsDF[:10])
print(cotsDF.head())
# cotsDF.groupby(['video_id','video_frame'])['sequence'].count()
# # ['sequence'].max()

   video_id  sequence  video_frame  sequence_frame image_id annotations  \
0         0     40258            0               0      0-0          []   
1         0     40258            1               1      0-1          []   
2         0     40258            2               2      0-2          []   
3         0     40258            3               3      0-3          []   
4         0     40258            4               4      0-4          []   

                                          image_path  
0  /content/tensorflow-great-barrier-reef/train_i...  
1  /content/tensorflow-great-barrier-reef/train_i...  
2  /content/tensorflow-great-barrier-reef/train_i...  
3  /content/tensorflow-great-barrier-reef/train_i...  
4  /content/tensorflow-great-barrier-reef/train_i...  
