In [1]:
import numpy as np
import pandas as pd
import cv2
from pathlib import Path
from colorama import Fore, Back, Style
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, Image
import ast


ROOT_DIR = Path("/kaggle/input/tensorflow-great-barrier-reef")

TRAIN_CSV = ROOT_DIR / "train.csv"
TRAIN_DF = pd.read_csv(TRAIN_CSV)

TEST_CSV = ROOT_DIR / "test.csv"
TEST_DF = pd.read_csv(TEST_CSV)

list(ROOT_DIR.iterdir())


In [2]:
TRAIN_DF.head()

In [3]:
TRAIN_DF["video_id"].unique()
# Checking for unique video_id

In [4]:
TRAIN_DF["sequence"].unique()
# Checking for unique sequence ids

In [5]:
vid_0 = TRAIN_DF[TRAIN_DF["video_id"] == 0]["sequence"].unique()
print(vid_0)
print(vid_0.shape[0])

# Printing the unique sequence_id for video_id 0

In [6]:
vid_1 = TRAIN_DF[TRAIN_DF["video_id"] == 1]["sequence"].unique()
print(vid_1)
print(vid_1.shape[0])

# Printing the unique sequence_id for video_id 1

In [7]:
vid_2 = TRAIN_DF[TRAIN_DF["video_id"] == 2]["sequence"].unique()
print(vid_2)
print(vid_2.shape[0])

# Printing the unique sequence_id for video_id 2

In [8]:
if TRAIN_DF["sequence"].unique().shape[0] == (vid_0.shape[0] + vid_1.shape[0] + vid_2.shape[0]):
    print("Sequence numbers are unique in 3 videos")
else:
    print("Duplicate sequence numbers are found in 3 videos")
    
# Checking for duplicate sequence numbers

In [9]:
TRAIN_DF[TRAIN_DF["video_id"] == 0].shape

# Checking the number of images under video_id 0

In [10]:
TRAIN_DF[TRAIN_DF["video_id"] == 1].shape

# Checking the number of images under video_id 1

In [11]:
TRAIN_DF[TRAIN_DF["video_id"] == 2].shape

# Checking the number of images under video_id 2

In [12]:
def print_record_length(vid_id):
    seqs = TRAIN_DF[TRAIN_DF["video_id"] == vid_id]["sequence"].unique()
    dict_seqid_to_cnt = {}
    for seq_id in seqs:
        train_df = TRAIN_DF[TRAIN_DF["video_id"] == vid_id]
        train_df = train_df[train_df["sequence"] == seq_id]
        dict_seqid_to_cnt[seq_id] = len(train_df)
    return dict_seqid_to_cnt

# Returning the dictionary with key as sequence_id of corresponding vid_id and value as number of images under this seq_id

In [13]:
dict_seqid_to_cnt =  print_record_length(2)
for seqid in vid_2:
    print("Video ID with " + str(2) + " with sequence number " + str(seqid) + " having " + str(dict_seqid_to_cnt[seqid]) + " number of records")

In [14]:
def check_annotations_present(vid_id):
    seqs = TRAIN_DF[TRAIN_DF["video_id"] == vid_id]["sequence"].unique()
    dict_seq_id_to_idxs = {}
    for seq_id in seqs:
        idxs = []
        train_df = TRAIN_DF[TRAIN_DF["video_id"] == vid_id]
        train_df = train_df[train_df["sequence"] == seq_id]
        for i in range(len(train_df)):
            list_of_dict_ann = ast.literal_eval(train_df.iloc[i].annotations)
            if len(list_of_dict_ann) > 0 :
                idxs.append(train_df.index[i])
        
        dict_seq_id_to_idxs[seq_id] = idxs
    return dict_seq_id_to_idxs

# Returning the dictionary with key as sequence_id of corresponding vid_id and value as the list of number of images under this seq_id

In [15]:
dict_seq_id_to_idxs = check_annotations_present(2)
for idx in vid_2:
    print("Sequence id with " + str(idx) + " having " + str(len(dict_seq_id_to_idxs[idx])) + " annotated images out of " + str(dict_seqid_to_cnt[idx]) + " images")

In [16]:
TRAIN_DF

In [17]:
# Image_id = video_id + video_frame

### Is the sequence_frame the same as video_frame?

In [18]:
df = TRAIN_DF[TRAIN_DF["video_id"] == 2]
df1 = df[df["sequence"] == vid_2[0]]
df1

# Sequences under video_id 0

In [19]:
df2 = df[df["sequence"] == vid_2[1]]
df2

# Sequences under video_id 1

In [20]:
df3 = df[df["sequence"] == vid_2[2]]
df3

# Sequences under video_id 2

In [21]:
# By observing the data, we can come to conclusion that
# -> sequence_frame starts from 0 for every sequence till the length of the sequence (n)
# -> video_frame starts from random number and is contiguous but not consecutive

### Do sequence_frames go from 0 to N nicely?

In [22]:
print("Sequence frames sequential and start from 0?")
for seq_name in TRAIN_DF["sequence"].unique():
    sequential = True
    numbers = TRAIN_DF[TRAIN_DF["sequence"] == seq_name]["sequence_frame"].values
    numbers.sort()
    
    i = 0
    for num in numbers:
        while i < num:
            print(f"Seq {seq_name}: {Fore.RED}Missing {i}{Fore.RESET}")
            i += 1
        i += 1

    if sequential:
        print(f"Seq {seq_name}: {Fore.GREEN}Yes{Fore.RESET}")
print()

# Checking for the sequentiality of the sequence_frames

### Are video_frames sequential?

In [23]:
print("Video frames sequential?")
for seq_name in TRAIN_DF["sequence"].unique():
    sequential = True
    numbers = TRAIN_DF[TRAIN_DF["sequence"] == seq_name]["video_frame"].values
    numbers.sort()
    
    i = numbers[0]
    for num in numbers:
        while i < num:
            print(f"Seq {seq_name}: {Fore.RED}Missing {i}{Fore.RESET}")
            i += 1
        i += 1

    if sequential:
        print(f"Seq {seq_name}: {Fore.GREEN}Yes{Fore.RESET}")
print()
new_vid_ids = TRAIN_DF["video_id"].astype(str) + "-" + TRAIN_DF["video_frame"].astype(str)
print("How many images have strange image_ids:", (TRAIN_DF["image_id"] != new_vid_ids).sum())

# Checking for the sequentiality of the video_frames

### Do videos have unique sequence names?

In [24]:
vid_seq_pairs = TRAIN_DF[["video_id", "sequence"]].drop_duplicates()
repeated_sequence_count = (vid_seq_pairs["sequence"].value_counts() != 1).sum()
print("How many repeated sequences:", repeated_sequence_count)

# Checking for the duplicate sequence_ids

## How are the annotations distributed?

In [25]:
if "detection_count" not in TRAIN_DF.columns:
    det_counts = TRAIN_DF.apply(lambda row: len(eval(row.annotations)), axis=1)
    TRAIN_DF["detection_count"] = det_counts
    
# Detection_count column contains the number of starfish present in that particular image

In [26]:
TRAIN_DF["detection_count"].value_counts()

# Gives the frequency of each values

In [27]:
# 0 starfish is present in 18582 images, 1 starfish is present in 2801 images and so on

In [28]:
bin_count = len(TRAIN_DF["detection_count"].unique())
plot = TRAIN_DF.hist(column="detection_count", figsize=(16,6), bins=bin_count)
ax = plot[0][0]
ax.set_title("Starfish count, per image")

# X axis shows the number of starfish count and y axis shows the number of images containing those amount of starfish

In [29]:
TRAIN_DF_WITH_STARFISH = TRAIN_DF[TRAIN_DF["detection_count"] > 0]
bin_count = len(TRAIN_DF_WITH_STARFISH["detection_count"].unique())
plot = TRAIN_DF_WITH_STARFISH.hist(column="detection_count", figsize=(16,4), bins=bin_count)
ax = plot[0][0]
ax.set_title("Starfish count per image ");

# Plotted only for images having atleast 1 starfish

## How do detections change during the frame?

In [30]:
import math 

SEQUENCE_COUNT = len(TRAIN_DF["sequence"].drop_duplicates())
FIG_COLS = 2
FIG_ROWS = math.ceil(SEQUENCE_COUNT / FIG_COLS)
fig = plt.figure(figsize=(30, 40), constrained_layout=True)

det_data = TRAIN_DF[["sequence", "video_id", "sequence_frame", "detection_count"]].drop_duplicates()
for i, seq_num in enumerate(det_data["sequence"].unique()): 
    seq_data = det_data[det_data["sequence"] == seq_num].sort_values(by="sequence_frame")
    seq_data = seq_data.set_index(seq_data["sequence_frame"]).drop(columns="sequence_frame")
    video_id = seq_data["video_id"].iloc[0]
    
    col = (i % FIG_COLS) + 1
    row = (i // FIG_COLS) + 1
    
    ax = plt.subplot(FIG_ROWS, FIG_COLS, i+1)
    ax = seq_data["detection_count"].plot.line(ax=ax)
    ax.set_title(f"Video {video_id}, Sequence {seq_num}", fontsize=22)
    ax.set_ylabel('Detections', fontsize=16)
    ax.set_xlabel('Sequence Frame', fontsize=16)
    

In [31]:
# Above diagram shows that
# -> For each subplot, there is a sequence_frame which starts from 0
# -> For each sequence_frame, there will be image associated with that
# -> For each image, there are list of annotations
# -> Length of each annotations are plotted in the graph

In [32]:
img_idx = TRAIN_DF.iloc[dict_seq_id_to_idxs[26651][0]].values
img_idx

In [33]:
import cv2
import matplotlib.patches as patches

In [34]:
rect_coordinates = ast.literal_eval(img_idx[5])
rect_coordinates
# Convert from string in list to normal list

In [35]:
image = plt.imread("../input/tensorflow-great-barrier-reef/train_images/video_2/4718.jpg")
# plt.imshow(image)
fig, ax = plt.subplots(1,figsize=(15,15))
ax.imshow(image)
for e in rect_coordinates:
    x = e['x']
    y = e['y']
    w = e['width']
    h = e['height']
    rect = patches.Rectangle((x, y), w, h, linewidth=1,edgecolor='r', facecolor="none")
    ax.add_patch(rect)
plt.show()

# Plotting image having one starfish

In [36]:
dict_seqid_to_cnt

In [37]:
img_idx_1 = TRAIN_DF.iloc[19616].values
img_idx_1

In [38]:
rect_coordinates_ = ast.literal_eval(img_idx_1[5])

In [39]:
rect_coordinates_

In [40]:
image = plt.imread("../input/tensorflow-great-barrier-reef/train_images/video_2/5714.jpg")
fig, ax = plt.subplots(1,figsize=(15,15))
ax.imshow(image)
for e in rect_coordinates_:
    x = e['x']
    y = e['y']
    w = e['width']
    h = e['height']
    rect = patches.Rectangle((x, y), w, h, linewidth=2,edgecolor='r', facecolor="none")
    ax.add_patch(rect)
plt.show()

# Plotting image having more than one starfish

In [41]:
image.shape

In [42]:
plt.imread("../input/tensorflow-great-barrier-reef/train_images/video_2/1000.jpg").shape

In [43]:
# clone the tensorflow/models repository for the pretrained models execution

!git clone https://github.com/tensorflow/models
    
# Check out a certain commit to ensure that future changes in the TF ODT API codebase won't affect this notebook.
!cd models && git checkout ac8d06519

In [44]:
%%bash
cd models/research

# Compile protos.
protoc object_detection/protos/*.proto --python_out=.

# Install TensorFlow Object Detection API.
# Note: I fixed the version of some dependencies to make it work on Kaggle notebook. In particular:
# * scipy==1.6.3 to avoid the missing GLIBCXX_3.4.26 error
# * tensorflow to 2.6.0 to make it compatible with the CUDA version preinstalled on Kaggle.
# When Kaggle notebook upgrade to TF 2.7, you can use the default setup.py script:
# cp object_detection/packages/tf2/setup.py .
wget https://storage.googleapis.com/odml-dataset/others/setup.py
pip install -q --user .

# Test if the Object Dectection API is working correctly
python object_detection/builders/model_builder_tf2_test.py

In [45]:
import contextlib2
import io
import IPython
import json
import numpy as np
import os
import pathlib
import pandas as pd
import sys
import tensorflow as tf
import time

from PIL import Image, ImageDraw

# Import the library that is used to submit the prediction result.
INPUT_DIR = '/kaggle/input/tensorflow-great-barrier-reef/'
sys.path.insert(0, INPUT_DIR)
import greatbarrierreef

In [46]:
print(tf.__version__)
print(tf.test.is_gpu_available())
print(tf.config.list_physical_devices('GPU'))

In [47]:
TRAINING_RATIO = 0.8

data_df = pd.read_csv(os.path.join(INPUT_DIR, 'train.csv'))

# Split the dataset so that no sequence is leaked from the training dataset into the validation dataset.
split_index = int(TRAINING_RATIO * len(data_df))
while data_df.iloc[split_index - 1].sequence == data_df.iloc[split_index].sequence:
    split_index += 1

# Shuffle both the training and validation datasets.
train_data_df = data_df.iloc[:split_index].sample(frac=1).reset_index(drop=True)
val_data_df = data_df.iloc[split_index:].sample(frac=1).reset_index(drop=True)

train_positive_count = len(train_data_df[train_data_df.annotations != '[]'])
val_positive_count = len(val_data_df[val_data_df.annotations != '[]'])

print('Training ratio (all samples):', 
      float(len(train_data_df)) / (len(train_data_df) + len(val_data_df)))
print('Training ratio (positive samples):', 
      float(train_positive_count) / (train_positive_count + val_positive_count))

In [48]:
train_data_df.head()

In [49]:
# Total length of the train_data
print("Length of training data: "+ str(len(train_data_df)))
#Total length of the validation data
print("Length of validation data: "+ str(len(val_data_df)))

In [50]:
# Data points with atleast one starfish in training dataset
train_data_df = train_data_df[train_data_df.annotations != '[]'].reset_index()
print('Number of positive images used for training:', len(train_data_df))
# Data points with atleast one starfish in validation dataset
val_data_df = val_data_df[val_data_df.annotations != '[]'].reset_index()
print('Number of positive images used for validation:', len(val_data_df))

In [51]:
# Function to convert the images into TFRecord which is the binary format understandble by the pretrained model

from object_detection.utils import dataset_util
from object_detection.dataset_tools import tf_record_creation_util


def create_tf_example(video_id, video_frame, data_df, image_path):
    """Create a tf.Example entry for a given training image."""
    full_path = os.path.join(image_path, os.path.join(f'video_{video_id}', f'{video_frame}.jpg'))
    with tf.io.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')

    height = image.size[1] # Image height
    width = image.size[0] # Image width
    filename = f'{video_id}:{video_frame}'.encode('utf8') # Unique id of the image.
    encoded_image_data = None # Encoded image bytes
    image_format = 'jpeg'.encode('utf8') # b'jpeg' or b'png'

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
             # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    rows = data_df[(data_df.video_id == video_id) & (data_df.video_frame == video_frame)]
    for _, row in rows.iterrows():
        annotations = json.loads(row.annotations.replace("'", '"'))
        for annotation in annotations:
            xmins.append(annotation['x'] / width) 
            xmaxs.append((annotation['x'] + annotation['width']) / width) 
            ymins.append(annotation['y'] / height) 
            ymaxs.append((annotation['y'] + annotation['height']) / height) 

            classes_text.append('COTS'.encode('utf8'))
            classes.append(1)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    
    return tf_example

def convert_to_tfrecord(data_df, tfrecord_filebase, image_path, num_shards = 10):
    """Convert the object detection dataset to TFRecord as required by the TF ODT API."""
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, tfrecord_filebase, num_shards)
        
        for index, row in data_df.iterrows():
            if index % 500 == 0:
                print('Processed {0} images.'.format(index))
            tf_example = create_tf_example(row.video_id, row.video_frame, data_df, image_path)
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(tf_example.SerializeToString())
        
        print('Completed processing {0} images.'.format(len(data_df)))

!mkdir dataset
image_path = os.path.join(INPUT_DIR, 'train_images')

# Convert train images to TFRecord
print('Converting TRAIN images...')
convert_to_tfrecord(
  train_data_df,
  'dataset/cots_train',
  image_path,
  num_shards = 4
)

# Convert validation images to TFRecord
print('Converting VALIDATION images...')
convert_to_tfrecord(
  val_data_df,
  'dataset/cots_val',
  image_path,
  num_shards = 4
)

In [53]:
# Labels mapping for mapping the output categories to the definite classess
# Here only detections are done hence only one label is required

label_map_str = """item {
  id: 1
  name: 'COTS'
}"""

with open('dataset/label_map.pbtxt', 'w') as f:
  f.write(label_map_str)

!more dataset/label_map.pbtxt

In [54]:
# Getting the pretrained efficient-det-D0 model and decompressing it

!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz
!tar -xvzf efficientdet_d0_coco17_tpu-32.tar.gz

In [55]:
# This file is solely responsible for training,validating and predicting and detecting
# Number of changes needed to be done on this config file for the transfer learning to incorporate

from string import Template

config_file_template = """
# SSD with EfficientNet-b0 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d0).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Initialized from an EfficientDet-D0 checkpoint.
#
# Train on GPU

model {
  ssd {
    inplace_batchnorm_update: true
    freeze_batchnorm: false
    num_classes: 1
    add_background_class: false
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
        use_matmul_gather: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    encode_background_as_zeros: true
    anchor_generator {
      multiscale_anchor_generator {
        min_level: 3
        max_level: 7
        anchor_scale: 4.0
        aspect_ratios: [1.0, 2.0, 0.5]
        scales_per_octave: 3
      }
    }
    image_resizer {
      keep_aspect_ratio_resizer {
        min_dimension: 1280
        max_dimension: 1280
        pad_to_max_dimension: true
        }
    }
    box_predictor {
      weight_shared_convolutional_box_predictor {
        depth: 64
        class_prediction_bias_init: -4.6
        conv_hyperparams {
          force_use_bias: true
          activation: SWISH
          regularizer {
            l2_regularizer {
              weight: 0.00004
            }
          }
          initializer {
            random_normal_initializer {
              stddev: 0.01
              mean: 0.0
            }
          }
          batch_norm {
            scale: true
            decay: 0.99
            epsilon: 0.001
          }
        }
        num_layers_before_predictor: 3
        kernel_size: 3
        use_depthwise: true
      }
    }
    feature_extractor {
      type: 'ssd_efficientnet-b0_bifpn_keras'
      bifpn {
        min_level: 3
        max_level: 7
        num_iterations: 3
        num_filters: 64
      }
      conv_hyperparams {
        force_use_bias: true
        activation: SWISH
        regularizer {
          l2_regularizer {
            weight: 0.00004
          }
        }
        initializer {
          truncated_normal_initializer {
            stddev: 0.03
            mean: 0.0
          }
        }
        batch_norm {
          scale: true,
          decay: 0.99,
          epsilon: 0.001,
        }
      }
    }
    loss {
      classification_loss {
        weighted_sigmoid_focal {
          alpha: 0.25
          gamma: 1.5
        }
      }
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    normalize_loss_by_num_matches: true
    normalize_loc_loss_by_codesize: true
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.5
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
  }
}

train_config: {
  fine_tune_checkpoint: "efficientdet_d0_coco17_tpu-32/checkpoint/ckpt-0"
  fine_tune_checkpoint_version: V2
  fine_tune_checkpoint_type: "detection"
  batch_size: 2
  sync_replicas: false
  startup_delay_steps: 0
  replicas_to_aggregate: 1
  use_bfloat16: false
  num_steps: $training_steps
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_scale_crop_and_pad_to_square {
      output_size: 1280
      scale_min: 0.5
      scale_max: 2.0
    }
  }
  optimizer {
    momentum_optimizer: {
      learning_rate: {
        cosine_decay_learning_rate {
          learning_rate_base: 5e-3
          total_steps: $training_steps
          warmup_learning_rate: 5e-4
          warmup_steps: $warmup_steps
        }
      }
      momentum_optimizer_value: 0.9
    }
    use_moving_average: false
  }
  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
}

train_input_reader: {
  label_map_path: "dataset/label_map.pbtxt"
  tf_record_input_reader {
    input_path: "dataset/cots_train-?????-of-00004"
  }
}

eval_config: {
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
  batch_size: 2;
}

eval_input_reader: {
  label_map_path: "dataset/label_map.pbtxt"
  shuffle: false
  num_epochs: 1
  tf_record_input_reader {
    input_path: "dataset/cots_val-?????-of-00004"
  }
}
"""

In [56]:
# Changing the placeholders in config file

TRAINING_STEPS = 10000
WARMUP_STEPS = 2000
PIPELINE_CONFIG_PATH='dataset/pipeline.config'

pipeline = Template(config_file_template).substitute(
    training_steps=TRAINING_STEPS, warmup_steps=WARMUP_STEPS)

with open(PIPELINE_CONFIG_PATH, 'w') as f:
    f.write(pipeline)

In [57]:
# Training the model with pipeline config path and model directory to store the checkpoints created during the training which are helpful for evaluating

MODEL_DIR='cots_efficientdet_d0'
!mkdir {MODEL_DIR}
!python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={PIPELINE_CONFIG_PATH} \
    --model_dir={MODEL_DIR} \
    --alsologtostderr

In [58]:
# Evaluating the model using the checkpoints created during the training step

!python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path={PIPELINE_CONFIG_PATH} \
    --model_dir={MODEL_DIR} \
    --checkpoint_dir={MODEL_DIR} \
    --eval_timeout=0 \
    --alsologtostderr

In [59]:
# Saving the model for the detections and the predictions with the test dataset

!python models/research/object_detection/exporter_main_v2.py \
    --input_type image_tensor \
    --pipeline_config_path={PIPELINE_CONFIG_PATH} \
    --trained_checkpoint_dir={MODEL_DIR} \
    --output_directory={MODEL_DIR}/output

In [60]:
!ls {MODEL_DIR}/output

In [61]:
# Load the TensorFlow COTS detection model into memory.
start_time = time.time()
tf.keras.backend.clear_session()
detect_fn_tf_odt = tf.saved_model.load(os.path.join(os.path.join(MODEL_DIR, 'output'), 'saved_model'))
end_time = time.time()
elapsed_time = end_time - start_time
print('Elapsed time: ' + str(elapsed_time) + 's')

In [62]:
def load_image_into_numpy_array(path):
    """Load an image from file into a numpy array.

    Puts image into numpy array to feed into tensorflow graph.
    Note that by convention we put it into a numpy array with shape
    (height, width, channels), where channels=3 for RGB.

    Args:
    path: a file path (this can be local or on colossus)

    Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
    """
    img_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(io.BytesIO(img_data))
    (im_width, im_height) = image.size
    
    return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

def detect(image_np):
    """Detect COTS from a given numpy image."""

    input_tensor = np.expand_dims(image_np, 0)
    start_time = time.time()
    detections = detect_fn_tf_odt(input_tensor)
    return detections

In [63]:
# Load the test image
imgTest = load_image_into_numpy_array("../input/tensorflow-great-barrier-reef/train_images/video_2/5714.jpg")
imgTest.shape

In [64]:
listTest = [imgTest]

In [66]:
DETECTION_THRESHOLD = 0.3

# Check for the predictions

submission_dict = {
    'id': [],
    'prediction_string': [],
}

for image_np in listTest:
    height, width, _ = image_np.shape
    
    # Run object detection using the TensorFlow model.
    detections = detect(image_np)
    
    # Parse the detection result and generate a prediction string.
    num_detections = detections['num_detections'][0].numpy().astype(np.int32)
    predictions = []
    for index in range(num_detections):
        score = detections['detection_scores'][0][index].numpy()
        if score < DETECTION_THRESHOLD:
            continue

        bbox = detections['detection_boxes'][0][index].numpy()
        y_min = int(bbox[0] * height)
        x_min = int(bbox[1] * width)
        y_max = int(bbox[2] * height)
        x_max = int(bbox[3] * width)
        
        bbox_width = x_max - x_min
        bbox_height = y_max - y_min
        
        predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
    
    # Generate the submission data.
    prediction_str = ' '.join(predictions)
#     sample_prediction_df['annotations'] = prediction_str

    print('Prediction:', prediction_str)

In [None]:
# Clear the saved directories 

!rm -rf dataset
!rm -rf train_images
!rm tensorflow-great-barrier-reef.zip

# Remove other data downloaded during training.
!rm -rf models
!rm efficientdet_d0_coco17_tpu-32.tar.gz