<a href="https://colab.research.google.com/github/raitharnett/tensorflow-great-barrier-reef/blob/main/cots_mask_r_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%bash
# upgrade pip
python -m pip install --upgrade pip
# opencv
pip uninstall --yes opencv_python
pip install opencv-python-headless
# TF Object detection
git clone https://github.com/tensorflow/models.git 
cd models/research
# Compile protos.
protoc object_detection/protos/*.proto --python_out=.
# Install TensorFlow Object Detection API.
cp object_detection/packages/tf2/setup.py .
python -m pip install --use-feature=2020-resolver .
python object_detection/builders/model_builder_tf2_test.py

In [None]:
%%bash
# COTS data
unzip -d /content/tensorflow-great-barrier-reef /content/drive/MyDrive/cots/tensorflow-great-barrier-reef.zip
# TF mask_rcnn model
wget http://download.tensorflow.org/models/object_detection/tf2/20200711/mask_rcnn_inception_resnet_v2_1024x1024_coco17_gpu-8.tar.gz 
tar -xvzf mask_rcnn_inception_resnet_v2_1024x1024_coco17_gpu-8.tar.gz 


In [None]:
import os
import sys
import pathlib
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import io
import scipy.misc
import shutil
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from six.moves.urllib.request import urlopen

import cv2 as cv 
from google.colab.patches import cv2_imshow
import matplotlib.patches as patches
print(f"OpenCV version: {cv.__version__}")

import tensorflow as tf
import tensorflow_hub as hub
from object_detection.utils import dataset_util
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
from sklearn.model_selection import train_test_split
import contextlib2
from pathlib import Path
from enum import Enum

class COTSClass(Enum):
  COTS = 1
  
%matplotlib inline
tf.get_logger().setLevel('ERROR')
print(f"TF version: {tf.__version__}")

In [None]:
# path constants
COTS_DATA_ROOT = '/content/'
COTS_DATA =  os.path.join(COTS_DATA_ROOT,'tensorflow-great-barrier-reef')
COTS_DATA_IMAGES = os.path.join(COTS_DATA,'train_images')
COTS_DATASET = '/content/dataset'
Path(COTS_DATASET).mkdir(parents=True, exist_ok=True)
COTS_DATA_TRAIN_TF_RECORDS = f'{COTS_DATASET}/train'
COTS_DATA_TEST_TF_RECORDS = f'{COTS_DATASET}/test'

In [None]:
# load COTS data
def cots_annotations(data):
  import json
  return json.loads(data.replace("'", '"'))

def cots_image_path(row):
  return  os.path.join(COTS_DATA_IMAGES,
                       f"video_{row.video_id}",
                       f"{row.video_frame}.jpg")

cots_df = pd.read_csv(os.path.join(COTS_DATA,'train.csv'), converters={'annotations':cots_annotations})
cots_df['image_path'] = cots_df.apply(cots_image_path, axis=1)
cots_train_test_split = train_test_split(cots_df, train_size = 0.8)

In [None]:
FORMAT = 'jpeg'.encode('utf8')
CLASS_NAME = COTSClass.COTS.name.encode('utf8')

def create_cots_tf_example(row): 
  contents = tf.io.read_file(row.image_path)
  image = tf.io.decode_jpeg(contents, channels=3)
  h,w,_ = image.shape
  filename = row.image_id.encode('utf8')
  boxes = np.array([[a['x'], a['x'] + a['width'], a['y'], a['y'] + a['height']] for a in row.annotations], dtype='float64')
  xmin, xmax, ymin, ymax, classes_text, classes = [], [], [], [], [], []
  if  (0 < boxes.size):
    # normalize
    boxes[:,[0,1]] *= 1/w
    boxes[:,[2,3]] *= 1/h
    xmin, xmax = np.transpose(boxes[:,[0,1]]).tolist()
    ymin, ymax = np.transpose(boxes[:,[2,3]]).tolist()
    classes_text = [CLASS_NAME for i in range(len(xmin))] 
    classes = [COTSClass.COTS.value for i in range(len(xmin))]
  
  feature = { 'image/height': dataset_util.int64_feature(h),
              'image/width': dataset_util.int64_feature(w),
              'image/filename': dataset_util.bytes_feature(filename),
              'image/source_id': dataset_util.bytes_feature(filename),
              'image/encoded': dataset_util.bytes_feature(contents.numpy()),
              'image/format': dataset_util.bytes_feature(FORMAT),
              'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
              'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
              'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
              'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
              'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
              'image/object/class/label': dataset_util.int64_list_feature(classes),
            }
  return tf.train.Example(features=tf.train.Features(feature=feature))

def load_cots(df, base_path, num_shards=10):
  with contextlib2.ExitStack() as stack:
    records = tf_record_creation_util.open_sharded_output_tfrecords(stack, base_path, num_shards)
    for index, row in df.iterrows():
      example = create_cots_tf_example(row)
      shard_index = index % num_shards
      records[shard_index].write(example.SerializeToString())

In [None]:
cots_tf_record_keys = [COTS_DATA_TRAIN_TF_RECORDS, COTS_DATA_TEST_TF_RECORDS]
cots_train_test_data = {cots_tf_record_keys[i]: cots_train_test_split[i] for i in range(len(cots_train_test_split))}

In [None]:
for base_path, df in cots_train_test_data.items():
  load_cots(df, base_path)

In [None]:
from string import Template

TRAINING_STEPS = 1000
WARMUP_STEPS = 100
BATCH_SIZE = 2

MODEL_LABEL_MAP = "/content/drive/MyDrive/cots/label_map.txt"
MODEL_PIPELINE_CONFIG_TEMPLATE = "/content/drive/MyDrive/cots/mask_rcnn_inception_resnet_v2_1024x1024_coco17_gpu-8.config"


COTS_MODEL_DIR='/content/drive/MyDrive/cots/mask_rcnn_inception_resnet_v2'

PIPELINE_CONFIG_PATH = '/content/dataset/pipeline.config'
LABEL_MAP_PATH = '/content/dataset/label_map.pbtxt'

shutil.copy(MODEL_LABEL_MAP, LABEL_MAP_PATH)

category_index = label_map_util.create_category_index_from_labelmap(LABEL_MAP_PATH, use_display_name=True)

with open(MODEL_PIPELINE_CONFIG_TEMPLATE, mode='r') as f:
  config_file_template = f.read()
pipeline = Template(config_file_template).substitute(training_steps=TRAINING_STEPS, warmup_steps=WARMUP_STEPS, batch_size=BATCH_SIZE)
with open(PIPELINE_CONFIG_PATH, mode='w') as f:
  f.write(pipeline)

In [None]:
%%bash -s "$COTS_MODEL_DIR" "$PIPELINE_CONFIG_PATH"
MODEL_DIR=$1
PIPELINE_CONFIG_PATH=$2
# train model - does not work yet? 
python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path=$PIPELINE_CONFIG_PATH \
    --model_dir=$MODEL_DIR \
    --alsologtostderr
# # evaluate model
# python models/research/object_detection/model_main_tf2.py \
#     --pipeline_config_path=$PIPELINE_CONFIG_PATH \
#     --model_dir=$MODEL_DIR \
#     --checkpoint_dir=$MODEL_DIR \
#     --eval_timeout=0 \
#     --alsologtostderr
# # save model
# python models/research/object_detection/exporter_main_v2.py \
#     --input_type image_tensor \
#     --pipeline_config_path=$PIPELINE_CONFIG_PATH \
#     --trained_checkpoint_dir=$MODEL_DIR \
#     --output_directory=$MODEL_DIR/output \
#     --alsologtostderr

In [None]:
detector = hub.load("https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1")

In [None]:
PATH_TO_LABELS = './models/research/object_detection/data/mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
cots_df_elements = cots_df[0 < cots_df['annotations'].map(len)].sample(n=1)

for _, row in cots_df_elements.iterrows():
  image_tensor = tf.io.decode_jpeg(tf.io.read_file(row.image_path), channels=3)
  detector_output = detector(image_tensor[tf.newaxis, ...])
  result = {key:value.numpy() for key,value in detector_output.items()}
  label_id_offset = 0
  image_np =  image_tensor.numpy()
  image_np_with_mask = image_tensor.numpy().copy()
  if 'detection_masks' in result:
    # we need to convert np.arrays to tensors
    detection_masks = tf.convert_to_tensor(result['detection_masks'][0])
    detection_boxes = tf.convert_to_tensor(result['detection_boxes'][0])

    # Reframe the bbox mask to the image size.
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
              detection_masks, detection_boxes,
                image_np.shape[0], image_np.shape[1])
    detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                        tf.uint8)
    result['detection_masks_reframed'] = detection_masks_reframed.numpy()

  vis_util.visualize_boxes_and_labels_on_image_array(
        image_np_with_mask,
        result['detection_boxes'][0],
        (result['detection_classes'][0] + label_id_offset).astype(int),
        result['detection_scores'][0],
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=200,
        min_score_thresh=.30,
        agnostic_mode=False,
        instance_masks=result.get('detection_masks_reframed', None),
        line_thickness=8)
  
  fig, ax = plt.subplots(figsize=(24,32))
  plt.imshow(image_np_with_mask)
  for a in row.annotations:
    ax.add_patch(patches.Rectangle((a['x'], a['y']),  a['width'], a['height'], linewidth=3, edgecolor='r', facecolor='none'))
  plt.show()




In [None]:
config = {
    "sigma_list": [15, 80, 250],
    "G"         : 5.0,
    "b"         : 25.0,
    "alpha"     : 125.0,
    "beta"      : 46.0,
    "low_clip"  : 0.01,
    "high_clip" : 0.99
}
def singleScaleRetinex(img, sigma):

    retinex = np.log10(img) - np.log10(cv.GaussianBlur(img, (0, 0), sigma))

    return retinex

def multiScaleRetinex(img, sigma_list):

    retinex = np.zeros_like(img)
    for sigma in sigma_list:
        retinex += singleScaleRetinex(img, sigma)

    retinex = retinex / len(sigma_list)

    return retinex

def colorRestoration(img, alpha, beta):

    img_sum = np.sum(img, axis=2, keepdims=True)

    color_restoration = beta * (np.log10(alpha * img) - np.log10(img_sum))

    return color_restoration

def simplestColorBalance(img, low_clip, high_clip):    

    total = img.shape[0] * img.shape[1]
    for i in range(img.shape[2]):
        unique, counts = np.unique(img[:, :, i], return_counts=True)
        current = 0
        for u, c in zip(unique, counts):            
            if float(current) / total < low_clip:
                low_val = u
            if float(current) / total < high_clip:
                high_val = u
            current += c
                
        img[:, :, i] = np.maximum(np.minimum(img[:, :, i], high_val), low_val)

    return img    

def MSRCR(img, sigma_list, G, b, alpha, beta, low_clip, high_clip):

    img = np.float64(img) + 1.0

    img_retinex = multiScaleRetinex(img, sigma_list)    
    img_color = colorRestoration(img, alpha, beta)    
    img_msrcr = G * (img_retinex * img_color + b)

    for i in range(img_msrcr.shape[2]):
        img_msrcr[:, :, i] = (img_msrcr[:, :, i] - np.min(img_msrcr[:, :, i])) / \
                             (np.max(img_msrcr[:, :, i]) - np.min(img_msrcr[:, :, i])) * \
                             255
    
    img_msrcr = np.uint8(np.minimum(np.maximum(img_msrcr, 0), 255))
    img_msrcr = simplestColorBalance(img_msrcr, low_clip, high_clip)       

    return img_msrcr

def automatedMSRCR(img, sigma_list):

    img = np.float64(img) + 1.0

    img_retinex = multiScaleRetinex(img, sigma_list)

    for i in range(img_retinex.shape[2]):
        unique, count = np.unique(np.int32(img_retinex[:, :, i] * 100), return_counts=True)
        for u, c in zip(unique, count):
            if u == 0:
                zero_count = c
                break
            
        low_val = unique[0] / 100.0
        high_val = unique[-1] / 100.0
        for u, c in zip(unique, count):
            if u < 0 and c < zero_count * 0.1:
                low_val = u / 100.0
            if u > 0 and c < zero_count * 0.1:
                high_val = u / 100.0
                break
            
        img_retinex[:, :, i] = np.maximum(np.minimum(img_retinex[:, :, i], high_val), low_val)
        
        img_retinex[:, :, i] = (img_retinex[:, :, i] - np.min(img_retinex[:, :, i])) / \
                               (np.max(img_retinex[:, :, i]) - np.min(img_retinex[:, :, i])) \
                               * 255

    img_retinex = np.uint8(img_retinex)
        
    return img_retinex

def MSRCP(img, sigma_list, low_clip, high_clip):

    img = np.float64(img) + 1.0

    intensity = np.sum(img, axis=2) / img.shape[2]    

    retinex = multiScaleRetinex(intensity, sigma_list)

    intensity = np.expand_dims(intensity, 2)
    retinex = np.expand_dims(retinex, 2)

    intensity1 = simplestColorBalance(retinex, low_clip, high_clip)

    intensity1 = (intensity1 - np.min(intensity1)) / \
                 (np.max(intensity1) - np.min(intensity1)) * \
                 255.0 + 1.0

    img_msrcp = np.zeros_like(img)
    
    for y in range(img_msrcp.shape[0]):
        for x in range(img_msrcp.shape[1]):
            B = np.max(img[y, x])
            A = np.minimum(256.0 / B, intensity1[y, x, 0] / intensity[y, x, 0])
            img_msrcp[y, x, 0] = A * img[y, x, 0]
            img_msrcp[y, x, 1] = A * img[y, x, 1]
            img_msrcp[y, x, 2] = A * img[y, x, 2]

    img_msrcp = np.uint8(img_msrcp - 1.0)

    return img_msrcp

In [None]:
cots_df_elements = cots_df[10 == cots_df['annotations'].map(len)].sample(n=1)
for _, row in cots_df_elements.iterrows():
  img = cv.imread(row.image_path)
  img = MSRCR(img,config['sigma_list'],
                     config['G'],
                     config['b'],
                     config['alpha'],
                     config['beta'],
                     config['low_clip'],
                     config['high_clip'])
  gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
  # gray = cv.GaussianBlur(gray,(3,3),0)
  # thresh = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 5, 0)
  overlay = img.copy()
  for a in row.annotations:
    roi = gray[a['y']:a['y']+a['height'],a['x']:a['x']+a['width']]
    roi_img = img[a['y']:a['y']+a['height'],a['x']:a['x']+a['width']]
    cv2_imshow(roi)
    cv2_imshow(roi_img)
    # global thresholding
    ret1,th1 = cv.threshold(roi,127,255,cv.THRESH_BINARY)
    # Otsu's thresholding
    ret2,th2 = cv.threshold(roi,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    # Otsu's thresholding after Gaussian filtering
    blur = cv.GaussianBlur(roi,(5,5),0)
    ret3,th3 = cv.threshold(blur,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    ret4,th4 = cv.threshold(blur,0,255,cv.THRESH_TOZERO+cv.THRESH_OTSU)
    ret5,th5 = cv.threshold(blur,0,255,cv.THRESH_TOZERO_INV+cv.THRESH_OTSU)
    ret6,th6 = cv.threshold(blur,0,255,cv.THRESH_BINARY_INV+cv.THRESH_OTSU)
    kernel = np.ones((3,3),np.uint8)
    # th3 = cv.erode(th3,kernel,iterations = 1)
    # th3 = cv.morphologyEx(th3, cv.MORPH_OPEN, kernel)

    cv2_imshow(th1)
    cv2_imshow(th2)
    cv2_imshow(th3)
    cv2_imshow(th4)
    cv2_imshow(th5)
    cv2_imshow(th6)

    contours, hierarchy = cv.findContours(th3, cv.RETR_CCOMP , cv.CHAIN_APPROX_NONE,offset=(a['x'], a['y']))
    contour = max(contours, key = lambda c : cv.arcLength(c, True))
    # contour = max(contours, key = lambda c : cv.contourArea(c))
    epsilon = 0.01 * cv.arcLength(contour, True)
    approx_polygon = cv.approxPolyDP(contour, epsilon, True)
    cv.drawContours(overlay, [approx_polygon], 0, (0,255,0), 3)
  alpha = 0.25
  img = cv.addWeighted(overlay, alpha, img, 1 - alpha, 0)
  cv2_imshow(img)
  cv.destroyAllWindows()


In [None]:
cots_df_elements = cots_df[10 == cots_df['annotations'].map(len)].sample(n=1)
for _, row in cots_df_elements.iterrows():
  img = cv.imread(row.image_path)
  img = MSRCR(img,config['sigma_list'],
                     config['G'],
                     config['b'],
                     config['alpha'],
                     config['beta'],
                     config['low_clip'],
                     config['high_clip'])
  img_gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
  overlay = img.copy()
  for a in row.annotations:
    roi = img_gray[a['y']:a['y']+a['height'],a['x']:a['x']+a['width']]
    blur = cv.GaussianBlur(roi,(5,5),0)
    _, thr = cv.threshold(blur,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    _, thr_inv = cv.threshold(blur,0,255,cv.THRESH_BINARY_INV+cv.THRESH_OTSU)
    contours, _ = cv.findContours(thr, cv.RETR_CCOMP, cv.CHAIN_APPROX_NONE,offset=(a['x'], a['y']))
    contours_inv, _ = cv.findContours(thr_inv, cv.RETR_CCOMP, cv.CHAIN_APPROX_NONE,offset=(a['x'], a['y']))
    contour = max(contours+contours_inv, key = lambda c : cv.arcLength(c, True))
    epsilon = 0.001 * cv.arcLength(contour, True)
    approx_polygon = cv.approxPolyDP(contour, epsilon, True)
    cv.drawContours(overlay, [approx_polygon], 0, (0,255,0), thickness=cv.FILLED)
    # roi_img = img[a['y']:a['y']+a['height'],a['x']:a['x']+a['width']]
    # cv2_imshow(roi)
    # cv2_imshow(roi_img)
    # # global thresholding
    # ret1,th1 = cv.threshold(roi,127,255,cv.THRESH_BINARY)
    # # Otsu's thresholding
    # ret2,th2 = cv.threshold(roi,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    # # Otsu's thresholding after Gaussian filtering
    # blur = cv.GaussianBlur(roi,(5,5),0)
    # ret3,th3 = cv.threshold(blur,0,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    # ret4,th4 = cv.threshold(blur,0,255,cv.THRESH_TOZERO+cv.THRESH_OTSU)
    # ret5,th5 = cv.threshold(blur,0,255,cv.THRESH_TOZERO_INV+cv.THRESH_OTSU)
    # ret6,th6 = cv.threshold(blur,0,255,cv.THRESH_BINARY_INV+cv.THRESH_OTSU)
    # kernel = np.ones((3,3),np.uint8)
    # # th3 = cv.erode(th3,kernel,iterations = 1)
    # # th3 = cv.morphologyEx(th3, cv.MORPH_OPEN, kernel)

    # cv2_imshow(th1)
    # cv2_imshow(th2)
    # cv2_imshow(th3)
    # cv2_imshow(th4)
    # cv2_imshow(th5)
    # cv2_imshow(th6)

    # contours, hierarchy = cv.findContours(th3, cv.RETR_CCOMP , cv.CHAIN_APPROX_NONE,offset=(a['x'], a['y']))
    # contour = max(contours, key = lambda c : cv.arcLength(c, True))
    # # contour = max(contours, key = lambda c : cv.contourArea(c))
    # epsilon = 0.01 * cv.arcLength(contour, True)
    # approx_polygon = cv.approxPolyDP(contour, epsilon, True)
    # cv.drawContours(overlay, [approx_polygon], 0, (0,255,0), 3)
  alpha = 0.5
  img = cv.addWeighted(overlay, alpha, img, 1 - alpha, 0)
  cv2_imshow(img)
  cv.destroyAllWindows()


In [None]:
cots_df_elements = cots_df[10 == cots_df['annotations'].map(len)].sample(n=1)
for _, row in cots_df_elements.iterrows():
  img = cv.imread(row.image_path)
  img = MSRCR(img,config['sigma_list'],
                     config['G'],
                     config['b'],
                     config['alpha'],
                     config['beta'],
                     config['low_clip'],
                     config['high_clip'])
  gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
  # thresh = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 5, 0)
  overlay = img.copy()
  for a in row.annotations:
    roi = thresh[a['y']:a['y']+a['height'],a['x']:a['x']+a['width']]

    kernel_size = 5
    blur_roi = cv.GaussianBlur(roi,(kernel_size, kernel_size),0)
    low_threshold = 50
    high_threshold = 150
    roi_edges = cv.Canny(blur_roi, low_threshold, high_threshold)
    rho = 1  # distance resolution in pixels of the Hough grid
    theta = np.pi / 180  # angular resolution in radians of the Hough grid
    threshold = 15  # minimum number of votes (intersections in Hough grid cell)
    min_line_length = 50  # minimum number of pixels making up a line
    max_line_gap = 20  # maximum gap in pixels between connectable line segments
    line_image = np.copy(img) * 0  # creating a blank to draw lines on

    # Run Hough on edge detected image
    # Output "lines" is an array containing endpoints of detected line segments
    roi_lines = cv.HoughLinesP(roi_edges, rho, theta, threshold, np.array([]),
                        min_line_length, max_line_gap)
    for line in roi_lines:
        for x1,y1,x2,y2 in line:
          cv.line(overlay,(x1,y1),(x2,y2),(255,0,0),5)

    # contours, hierarchy = cv.findContours(roi, cv.RETR_CCOMP , cv.CHAIN_APPROX_NONE,offset=(a['x'], a['y']))
    # contour = max(contours, key = lambda c : cv.arcLength(c, True))
    # epsilon = 0.01 * cv.arcLength(contour, True)
    # approx_polygon = cv.approxPolyDP(contour, epsilon, True)
    # cv.drawContours(overlay, [approx_polygon], 0, (0,255,0), thickness=cv.FILLED)
  alpha = 0.25
  img = cv.addWeighted(overlay, alpha, img, 1 - alpha, 0)
  cv2_imshow(img)
  cv.destroyAllWindows()