<a href="https://colab.research.google.com/github/raitharnett/tensorflow-great-barrier-reef/blob/main/cots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
#@title Install COTS data, TF models and an EfficientDet-D0 base model (to which transfer learning is applied to train a COTS detection model)
%%capture
%%bash
# upgrade kaggle and install API token
pip install --upgrade --force-reinstall --no-deps kaggle
mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json
# download and unzip COTS data
kaggle competitions download -c tensorflow-great-barrier-reef --force 
unzip -d  tensorflow-great-barrier-reef tensorflow-great-barrier-reef.zip 
# download an EfficientDet-D0 base model and apply transfer learning to train a COTS detection model
wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz 
tar -xvzf efficientdet_d0_coco17_tpu-32.tar.gz 
# clone and install TF models
git clone --depth 1 https://github.com/tensorflow/models
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
import contextlib2
import io
import IPython
import json
import numpy as np
import os
import pathlib
import pandas as pd
import sys
import tensorflow as tf
import time
from pathlib import Path
from enum import Enum

from PIL import Image, ImageDraw
from sklearn.model_selection import train_test_split

# COTS testing data
COTS_DATA = '/content/tensorflow-great-barrier-reef'
sys.path.insert(0, COTS_DATA)
import greatbarrierreef

COTS_DATA_IMAGES = os.path.join(COTS_DATA,'train_images')
COTS_DATASET = '/content/dataset'
Path(COTS_DATASET).mkdir(parents=True, exist_ok=True)
COTS_DATA_TRAIN_TF_RECORDS = f'{COTS_DATASET}/train'
COTS_DATA_TEST_TF_RECORDS = f'{COTS_DATASET}/test'
COTS_MODEL_DIR='/content/cots_efficientdet_d0'
Path(COTS_MODEL_DIR).mkdir(parents=True, exist_ok=True)

class COTSClass(Enum):
  COTS = 1

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [None]:
#@title COTS data TF record conversion functions
from object_detection.utils import dataset_util
from object_detection.dataset_tools import tf_record_creation_util
import contextlib2

def createCOTSTFExample(row):
  with tf.io.gfile.GFile(row.image_path, 'rb') as fid:
    encodedJpg = fid.read()
  encodedJpgIO = io.BytesIO(encodedJpg)
  image = Image.open(encodedJpgIO)
  width = image.size[0]
  height = image.size[1]
  fileName = f'{row.image_id}'.encode('utf8')
  imageFormat = 'jpeg'.encode('utf8')
  classesText = []
  classes = []
  annotations = json.loads(row.annotations.replace("'", '"'))
  xmin = []
  ymin = []
  xmax = []
  ymax = []
  for annotation in annotations:
    xmin.append(annotation['x'] / width) 
    xmax.append((annotation['x'] + annotation['width']) / width) 
    ymin.append(annotation['y'] / height) 
    ymax.append((annotation['y'] + annotation['height']) / height) 
    classesText.append(COTSClass.COTS.name.encode('utf8'))
    classes.append(COTSClass.COTS.value)

  tfExample = tf.train.Example(features=tf.train.Features(feature={
    'image/height': dataset_util.int64_feature(height),
    'image/width': dataset_util.int64_feature(width),
    'image/filename': dataset_util.bytes_feature(fileName),
    'image/source_id': dataset_util.bytes_feature(fileName),
    'image/encoded': dataset_util.bytes_feature(encodedJpg),
    'image/format': dataset_util.bytes_feature(imageFormat),
    'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
    'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
    'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
    'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
    'image/object/class/text': dataset_util.bytes_list_feature(classesText),
    'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))

  return tfExample

def loadCOTS(df, basePath, numShards=10):
  with contextlib2.ExitStack() as tfRecordCloseStack:
    outputTFRecords = tf_record_creation_util.open_sharded_output_tfrecords(tfRecordCloseStack, basePath, numShards)
    for index, row in df.iterrows():
      tfExample = createCOTSTFExample(row)
      outputShardIndex = index % numShards
      outputTFRecords[outputShardIndex].write(tfExample.SerializeToString())

In [None]:
cotsDF = pd.read_csv(os.path.join(COTS_DATA,'train.csv'))
# add a 'image_path' column
cotsDF['image_path'] = cotsDF.apply(lambda r: os.path.join(COTS_DATA_IMAGES,f"video_{r.video_id}",f"{r.video_frame}.jpg"), axis=1)
cotsDFSplit = train_test_split(cotsDF, train_size = 0.8)
cotsTFRecordKeys = [COTS_DATA_TRAIN_TF_RECORDS, COTS_DATA_TEST_TF_RECORDS]
cotsTrainingTestData = {cotsTFRecordKeys[i]: cotsDFSplit[i] for i in range(len(cotsDFSplit))}
for basePath, df in cotsTrainingTestData.items():
  loadCOTS(df, basePath)

In [None]:
label_map_str = """item {
  id: 1
  name: 'COTS'
}"""

with open('dataset/label_map.pbtxt', 'w') as f:
  f.write(label_map_str)

In [None]:
#@title Pipeline template
from string import Template

config_file_template = """
# SSD with EfficientNet-b0 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d0).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Initialized from an EfficientDet-D0 checkpoint.
#
# Train on GPU

model {
  ssd {
    inplace_batchnorm_update: true
    freeze_batchnorm: false
    num_classes: 1
    add_background_class: false
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
        use_matmul_gather: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    encode_background_as_zeros: true
    anchor_generator {
      multiscale_anchor_generator {
        min_level: 3
        max_level: 7
        anchor_scale: 4.0
        aspect_ratios: [1.0, 2.0, 0.5]
        scales_per_octave: 3
      }
    }
    image_resizer {
      keep_aspect_ratio_resizer {
        min_dimension: 1280
        max_dimension: 1280
        pad_to_max_dimension: true
        }
    }
    box_predictor {
      weight_shared_convolutional_box_predictor {
        depth: 64
        class_prediction_bias_init: -4.6
        conv_hyperparams {
          force_use_bias: true
          activation: SWISH
          regularizer {
            l2_regularizer {
              weight: 0.00004
            }
          }
          initializer {
            random_normal_initializer {
              stddev: 0.01
              mean: 0.0
            }
          }
          batch_norm {
            scale: true
            decay: 0.99
            epsilon: 0.001
          }
        }
        num_layers_before_predictor: 3
        kernel_size: 3
        use_depthwise: true
      }
    }
    feature_extractor {
      type: 'ssd_efficientnet-b0_bifpn_keras'
      bifpn {
        min_level: 3
        max_level: 7
        num_iterations: 3
        num_filters: 64
      }
      conv_hyperparams {
        force_use_bias: true
        activation: SWISH
        regularizer {
          l2_regularizer {
            weight: 0.00004
          }
        }
        initializer {
          truncated_normal_initializer {
            stddev: 0.03
            mean: 0.0
          }
        }
        batch_norm {
          scale: true,
          decay: 0.99,
          epsilon: 0.001,
        }
      }
    }
    loss {
      classification_loss {
        weighted_sigmoid_focal {
          alpha: 0.25
          gamma: 1.5
        }
      }
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    normalize_loss_by_num_matches: true
    normalize_loc_loss_by_codesize: true
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.5
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
  }
}

train_config: {
  fine_tune_checkpoint: "efficientdet_d0_coco17_tpu-32/checkpoint/ckpt-0"
  fine_tune_checkpoint_version: V2
  fine_tune_checkpoint_type: "detection"
  batch_size: 2
  sync_replicas: false
  startup_delay_steps: 0
  replicas_to_aggregate: 1
  use_bfloat16: false
  num_steps: $training_steps
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_scale_crop_and_pad_to_square {
      output_size: 1280
      scale_min: 0.5
      scale_max: 2.0
    }
  }
  optimizer {
    momentum_optimizer: {
      learning_rate: {
        cosine_decay_learning_rate {
          learning_rate_base: 5e-3
          total_steps: $training_steps
          warmup_learning_rate: 5e-4
          warmup_steps: $warmup_steps
        }
      }
      momentum_optimizer_value: 0.9
    }
    use_moving_average: false
  }
  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
}

train_input_reader: {
  label_map_path: "dataset/label_map.pbtxt"
  tf_record_input_reader {
    input_path: "dataset/train-?????-of-00010"
  }
}

eval_config: {
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
  batch_size: 2;
}

eval_input_reader: {
  label_map_path: "dataset/label_map.pbtxt"
  shuffle: false
  num_epochs: 1
  tf_record_input_reader {
    input_path: "dataset/test-?????-of-00010"
  }
}
"""

In [None]:
TRAINING_STEPS = 1000
WARMUP_STEPS = 100
PIPELINE_CONFIG_PATH='dataset/pipeline.config'

pipeline = Template(config_file_template).substitute(
    training_steps=TRAINING_STEPS, warmup_steps=WARMUP_STEPS)

with open(PIPELINE_CONFIG_PATH, 'w') as f:
    f.write(pipeline)

In [None]:
#@title Train, evaluate and save model
%%bash -s "$COTS_MODEL_DIR" "$PIPELINE_CONFIG_PATH"
MODEL_DIR=$1
PIPELINE_CONFIG_PATH=$2
# train model
python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path=$PIPELINE_CONFIG_PATH \
    --model_dir=$MODEL_DIR \
    --alsologtostderr
# evaluate model
python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path=$PIPELINE_CONFIG_PATH \
    --model_dir=$MODEL_DIR \
    --checkpoint_dir=$MODEL_DIR \
    --eval_timeout=0 \
    --alsologtostderr
# save model
python models/research/object_detection/exporter_main_v2.py \
    --input_type image_tensor \
    --pipeline_config_path=$PIPELINE_CONFIG_PATH \
    --trained_checkpoint_dir=$MODEL_DIR \
    --output_directory=$MODEL_DIR/output \
    --alsologtostderr