# This notebook contains code to train a crown-of-thorns starfish (COTS) detection model to serve as a baseline model for [this competition](https://www.kaggle.com/c/tensorflow-great-barrier-reef/overview). We use [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) to apply transfer learning on an [EfficientDet-D4](https://arxiv.org/abs/1911.09070) pretrained model. 

# Install TensorFlow Object Detection API

Pip may report some dependency errors. You can safely ignore these errors and proceed if all tests in `model_builder_tf2_test.py` passed. 

In [None]:
!git clone https://github.com/tensorflow/models
    
# Check out a certain commit to ensure that future changes in the TF ODT API codebase won't affect this notebook.
!cd models && git checkout ac8d06519

In [None]:
%cd models/research

# Compile protos.
!protoc object_detection/protos/*.proto --python_out=.

# Install TensorFlow Object Detection API.
# Note: I fixed the version of some dependencies to make it work on Kaggle notebook. In particular:
# * scipy==1.6.3 to avoid the missing GLIBCXX_3.4.26 error
# * tensorflow and keras to 2.4.1 to be compatible with the current Kaggle TPU coordinator version
# When Kaggle notebook upgrade to TF 2.7, you can use the default setup.py script:
# cp object_detection/packages/tf2/setup.py .
!wget -O setup.py https://storage.googleapis.com/odml-dataset/others/setup_tf27.py
!pip install -q --user .

# Test if the Object Dectection API is working correctly
!python object_detection/builders/model_builder_tf2_test.py

# Upgrade tensorflow_gcs_config to the same version as Tensorflow.
!pip install tensorflow_gcs_config==2.7.0

%cd ../..

# Import dependencies

In [None]:
import contextlib2
import io
import IPython
import json
import numpy as np
import os
import pathlib
import sys
import tensorflow as tf
import time

from PIL import Image, ImageDraw

# Import the library that is used to submit the prediction result.
INPUT_DIR = '/kaggle/input/tensorflow-great-barrier-reef/'
sys.path.insert(0, INPUT_DIR)
import greatbarrierreef

In [None]:
# The notebook is supposed to run with TF 2.4.1
!pip install -q --user cloud_tpu_client
from cloud_tpu_client import Client

c = Client()
c.configure_tpu_version(tf.__version__, restart_type='ifNeeded')

print(tf.__version__)
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)
user_secrets.set_gcloud_credentials()

In [None]:
# # Load the credentials from Kaggle Secrets.
# from kaggle_secrets import UserSecretsClient
# user_secrets = UserSecretsClient()

# # Activate the service account stored in the Kaggle Secrets to access Cloud Storage
# with open('/tmp/service_account.json', 'w') as f:
#     f.write(user_secrets.get_secret('service_account_json'))
# !gcloud auth activate-service-account --key-file=/tmp/service_account.json --no-user-output-enabled

In [None]:
# Create a folder on Google Cloud Storage to store the checkpoints.
import pytz
from datetime import datetime
JST = pytz.timezone('Asia/Tokyo')
utc_dt = datetime.now()
FOLDER_NAME = utc_dt.astimezone(JST).strftime("%Y%m%d-%H%M")
BUCKET_NAME = user_secrets.get_secret('gcs_bucket_name')
GCS_OUTPUT_PATH = f"gs://{BUCKET_NAME}/kaggle/{FOLDER_NAME}"
print(GCS_OUTPUT_PATH)

# Prepare the training dataset

See this [notebook](https://www.kaggle.com/khanhlvg/tensorflow-prepare-cots-dataset-for-tpu/) to learn how to convert the training images to the TFRecord format required by TensorFlow Object Detection API. We'll use the output of the conversion notebook here and start training a model.

In [None]:
# Load the path to the preprocessed dataset.
from kaggle_datasets import KaggleDatasets
GCS_TFRECORD_BUCKET_PATH = KaggleDatasets().get_gcs_path('crown-of-thorns-starfish-dataset-in-tfrecord')
# GCS_TFRECORD_BUCKET_PATH='gs://cots_data_public'

# Train an object detection model

We'll use [TensorFlow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) and an EfficientDet-D0 base model and apply transfer learning to train a COTS detection model. EfficientDet-D0 is the smallest model in the EfficientDet model family and we pick it to reduce training time for demonstration purpose. You can probably increase accuracy by switch to using a larger EfficientDet model.

In [None]:
from string import Template

config_file_template = """
model {
  ssd {
    num_classes: 1
    image_resizer {
      keep_aspect_ratio_resizer {
        min_dimension: 1280
        max_dimension: 1280
        pad_to_max_dimension: true
      }
    }
    feature_extractor {
      type: "ssd_efficientnet-b4_bifpn_keras"
      conv_hyperparams {
        regularizer {
          l2_regularizer {
            weight: 3.9999998989515007e-05
          }
        }
        initializer {
          truncated_normal_initializer {
            mean: 0.0
            stddev: 0.029999999329447746
          }
        }
        activation: SWISH
        batch_norm {
          decay: 0.9900000095367432
          scale: true
          epsilon: 0.0010000000474974513
        }
        force_use_bias: true
      }
      bifpn {
        min_level: 3
        max_level: 7
        num_iterations: 7
        num_filters: 224
      }
    }
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 1.0
        x_scale: 1.0
        height_scale: 1.0
        width_scale: 1.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
        use_matmul_gather: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    box_predictor {
      weight_shared_convolutional_box_predictor {
        conv_hyperparams {
          regularizer {
            l2_regularizer {
              weight: 3.9999998989515007e-05
            }
          }
          initializer {
            random_normal_initializer {
              mean: 0.0
              stddev: 0.009999999776482582
            }
          }
          activation: SWISH
          batch_norm {
            decay: 0.9900000095367432
            scale: true
            epsilon: 0.0010000000474974513
          }
          force_use_bias: true
        }
        depth: 224
        num_layers_before_predictor: 4
        kernel_size: 3
        class_prediction_bias_init: -4.599999904632568
        use_depthwise: true
      }
    }
    anchor_generator {
      multiscale_anchor_generator {
        min_level: 3
        max_level: 7
        anchor_scale: 4.0
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        scales_per_octave: 3
      }
    }
    post_processing {
      batch_non_max_suppression {
        score_threshold: 9.99999993922529e-09
        iou_threshold: 0.5
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
    normalize_loss_by_num_matches: true
    loss {
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_loss {
        weighted_sigmoid_focal {
          gamma: 1.5
          alpha: 0.25
        }
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    encode_background_as_zeros: true
    normalize_loc_loss_by_codesize: true
    inplace_batchnorm_update: true
    freeze_batchnorm: false
    add_background_class: false
  }
}
train_config {
  batch_size: $batch_size
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_scale_crop_and_pad_to_square {
      output_size: 1280
      scale_min: 0.5
      scale_max: 2.0
    }
  }
  sync_replicas: true
  optimizer {
    momentum_optimizer {
      learning_rate {
        cosine_decay_learning_rate {
          learning_rate_base: 0.007999999821186066
          total_steps: $training_steps
          warmup_learning_rate: 0.00050000000474974513
          warmup_steps: $warmup_steps
        }
      }
      momentum_optimizer_value: 0.8999999761581421
    }
    use_moving_average: false
  }
  fine_tune_checkpoint: "$fine_tune_checkpoint"
  num_steps: $training_steps
  startup_delay_steps: 0.0
  replicas_to_aggregate: 8
  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
  fine_tune_checkpoint_type: "detection"
  use_bfloat16: true
  fine_tune_checkpoint_version: V2
}
train_input_reader: {
  label_map_path: "$label_map_path"
  tf_record_input_reader {
    input_path: "$train_input_path"
  }
}

eval_config: {
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
  batch_size: 1;
}

eval_input_reader: {
  label_map_path: "$label_map_path"
  shuffle: false
  num_epochs: 1
  tf_record_input_reader {
    input_path: "$val_input_path"
  }
}
"""

In [None]:
# Define the training pipeline

BATCH_SIZE=16
TRAINING_STEPS = 5000
WARMUP_STEPS = 500
PIPELINE_CONFIG_PATH='pipeline.config'

GCS_TFRECORD_TRAIN_PATH = GCS_TFRECORD_BUCKET_PATH + '/tfrecord/cots_train-?????-of-00008'
GCS_TFRECORD_VAL_PATH = GCS_TFRECORD_BUCKET_PATH + '/tfrecord/cots_val-?????-of-00008'
GCS_PRETRAINED_CHECKPOINT_PATH = GCS_TFRECORD_BUCKET_PATH + '/efficientdet_d4_coco17_tpu-32/checkpoint/ckpt-0'
GCS_LABEL_MAP_PATH = GCS_TFRECORD_BUCKET_PATH + '/label_map.pbtxt'

pipeline = Template(config_file_template).substitute(
    batch_size=BATCH_SIZE,
    training_steps=TRAINING_STEPS, 
    warmup_steps=WARMUP_STEPS,
    label_map_path=GCS_LABEL_MAP_PATH,
    train_input_path=GCS_TFRECORD_TRAIN_PATH,
    val_input_path=GCS_TFRECORD_VAL_PATH,
    fine_tune_checkpoint=GCS_PRETRAINED_CHECKPOINT_PATH
)

with open(PIPELINE_CONFIG_PATH, 'w') as f:
    f.write(pipeline)

MODEL_DIR=GCS_OUTPUT_PATH + '/cots_efficientdet_d4'

In [None]:
!python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={PIPELINE_CONFIG_PATH} \
    --model_dir={MODEL_DIR} \
    --use_tpu=true \
    --alsologtostderr

# Evaluate the object detection model

In [None]:
!python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={PIPELINE_CONFIG_PATH} \
    --model_dir={MODEL_DIR} \
    --checkpoint_dir={MODEL_DIR} \
    --eval_timeout=0 \
    --alsologtostderr

# Export as SavedModel for inference

In [None]:
!python models/research/object_detection/exporter_main_v2.py \
    --input_type image_tensor \
    --pipeline_config_path={PIPELINE_CONFIG_PATH} \
    --trained_checkpoint_dir={MODEL_DIR} \
    --output_directory=output

In [None]:
!ls output

# Clean up

In [None]:
# Remove data downloaded during training.
!rm -rf models