## Auditing and Exporting Detections

In [1]:
import os
import argparse
import numpy as np
import json
import csv
import rasterio

from shapely.geometry import mapping, Polygon
import fiona # only required for exporting to shapefiles

In [2]:
# ingest the image
infile = "mosaics/GrandJason_SEBlob_Nov2019_New_transparent_mosaic_group1.tif"
img_dir = infile.split(".")[0] 
prj_name = img_dir.split("/")[-1]

In [3]:
img_dir

'mosaics/GrandJason_SEBlob_Nov2019_New_transparent_mosaic_group1'

In [4]:
# open the output from detection
with open('detections_albatross/final/detections_grandjason_seblob_500x500_overlap60.json') as f:
    detected_labels = json.load(f)

In [5]:
len(detected_labels)

1543

### Exporting to Shapefile

In [6]:
image_annotations = []
for key, value in detected_labels.items():
    #print(key)
    annotation = [[key][0].split("/")[-1]]
    detections = []
    for item in value:
        box = item['box']
        detections.append(box)
        #print(item)
    annotation.append(detections)
    image_annotations.append(annotation)

In [7]:
scores = []
for key, value in detected_labels.items():
    for item in value:
        score = item['score']
        scores.append(score)

In [8]:
with open('mosaics/metadata/grandjason_seblob_edited_data_500x500_overlap60.json') as f:
    img_data = json.load(f)

image_bbox = []
for annotation in image_annotations:
    for detection in annotation[1]:
        try:
            local_bounding_box = np.array([[detection[0], detection[1]], [detection[2], detection[1]], [detection[2], detection[3]], [detection[0], detection[3]]]).astype(int)
            image_located_bb = local_bounding_box + [img_data["image_locations"][annotation[0]]]
            image_bbox.append(image_located_bb)
            
        except ValueError: # if the image doesn't have a detection
            pass

In [9]:
len(image_bbox)

4558

In [10]:
bbox = []
for annotation in image_bbox:
    x1 = annotation[0][0]
    y1 = annotation[0][1]
    x2 = annotation[1][0]
    y2 = annotation[2][1]
    bounding_box = [x1,y1,x2,y2]
    bbox.append(bounding_box)

In [11]:
len(bbox)

4558

In [12]:
bboxes = np.array(bbox)

In [13]:
bboxes.shape

(4558, 4)

In [14]:
bboxes.ndim

2

In [15]:
len(bboxes)

4558

In [16]:
# Malisiewicz et al.
# import the necessary packages
import numpy as np

def non_max_suppression(boxes, probs=None, overlapThresh=0.6):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # if the bounding boxes are integers, convert them to floats -- this
    # is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # initialize the list of picked indexes
    pick = []

    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # compute the area of the bounding boxes and grab the indexes to sort
    # (in the case that no probabilities are provided, simply sort on the
    # bottom-left y-coordinate)
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = y2

    # if probabilities are provided, sort on them instead
    if probs is not None:
        idxs = probs

    # sort the indexes
    idxs = np.argsort(idxs)

    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the index value
        # to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # find the largest (x, y) coordinates for the start of the bounding
        # box and the smallest (x, y) coordinates for the end of the bounding
        # box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]

        # delete all indexes from the index list that have overlap greater
        # than the provided overlap threshold
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))

    # return only the bounding boxes that were picked
    return boxes[pick].astype("int")

In [17]:
pick = non_max_suppression(bboxes, scores, 0.6)

In [18]:
len(pick)

3771

In [19]:
pick_list = pick.tolist()

In [20]:
# ingest back in the coordinates of detections within an image referenced by their filename

with open('mosaics/metadata/grandjason_seblob_edited_data_500x500_overlap60.json') as f:
    img_data = json.load(f)

# open the satellite image
dataset = rasterio.open(infile)

geolocated_annotations_before_nms = []
for annotation in image_annotations:
    for detection in annotation[1]:
        try:            
            local_bounding_box = np.array([[detection[0], detection[1]], [detection[2], detection[1]], [detection[2], detection[3]], [detection[0], detection[3]]]).astype(int)
            image_located_bb = local_bounding_box + [img_data["image_locations"][annotation[0]]]
            
            geolocated_bb = []
            for point in image_located_bb:
                geolocated_bb.append(dataset.transform * point)
            geolocated_annotations_before_nms.append(geolocated_bb)
        except ValueError: # if the image doesn't have a detection
            pass

In [21]:
# ingest back in the coordinates of detections within an image referenced by their filename

with open('mosaics/metadata/grandjason_seblob_edited_data_500x500_overlap60.json') as f:
    img_data = json.load(f)

# open the satellite image
dataset = rasterio.open(infile)

geolocated_annotations_after_nms = []

for box in pick_list:
    image_located_bb = np.array([[box[0], box[1]], [box[2], box[1]], [box[2], box[3]], [box[0], box[3]]]).astype(int)
            
    geolocated_bb = []
    for point in image_located_bb:
        geolocated_bb.append(dataset.transform * point)
    geolocated_annotations_after_nms.append(geolocated_bb)
            

In [22]:
len(geolocated_annotations_before_nms)

4558

In [23]:
len(geolocated_annotations_after_nms)

3771

In [142]:
# write out the detections as a shapefile

from collections import OrderedDict
import fiona
from fiona.crs import from_epsg

# Define your schema as a polygon geom with a couple of fields
schema = {
    'geometry': 'Polygon',
    'properties': OrderedDict([
        ('ImageName', 'str'),
        ('Detection', 'str')
  ])
}

with fiona.open(
    'shapefiles_albatross/final/grandjason_seblob.shp',
    'w',
    driver='ESRI Shapefile',
    crs=dataset.crs,
    schema=schema) as c:
    
    for num, polygon in enumerate(geolocated_annotations_after_nms):
        record = {
            'geometry': {'coordinates': [polygon], 'type': 'Polygon'},
            'id': num,
            'properties': OrderedDict([('ImageName', infile),
                                       ('Detection', 'Albatross')
                                       ]),
            'type': 'Feature'}
        c.write(record)
        