## Auditing and Exporting Detections

In [1]:
import os
import argparse
import numpy as np
import json
import csv
import rasterio

from shapely.geometry import mapping, Polygon
import fiona # only required for exporting to shapefiles

In [2]:
# ingest the image
#infile = "mosaics/SteepleJason_Bubble_Nov2019_transparent_mosaic_group1.tif"

#img_dir = infile.split(".")[0]
#prj_name = img_dir.split("/")[-1]

infile = "mosaics/SteepleJason_Hump_Nov2019_transparent_mosaic_group1.tif"

img_dir = infile.split(".")[0] 
prj_name = img_dir.split("/")[-1]

#infile = "mosaics/SteepleJason_Blob_Nov2019_transparent_mosaic_group1.tif"

#img_dir = infile.split(".")[0]
#prj_name = img_dir.split("/")[-1]

In [3]:
img_dir

'mosaics/SteepleJason_Hump_Nov2019_transparent_mosaic_group1'

In [4]:
# open the output from detection

with open('detections_final/overlap25/detections_steeplejason_hump.json') as f:
    detected_labels = json.load(f)

In [5]:
len(detected_labels)

322

### Exporting to Shapefile

In [6]:
image_annotations = []
for key, value in detected_labels.items():
    #print(key)
    annotation = [[key][0].split("/")[-1]]
    detections = []
    for item in value:
        box = item['box']
        detections.append(box)
        print(item)
    annotation.append(detections)
    image_annotations.append(annotation)

{'box': [368, 5, 456, 121], 'label': 0, 'score': 0.846792459487915}
{'box': [406, 340, 503, 451], 'label': 0, 'score': 0.8439090847969055}
{'box': [597, 153, 671, 255], 'label': 0, 'score': 0.8395474553108215}
{'box': [707, 619, 787, 742], 'label': 0, 'score': 0.8379165530204773}
{'box': [840, 482, 941, 578], 'label': 0, 'score': 0.8311867117881775}
{'box': [841, 781, 941, 907], 'label': 0, 'score': 0.8307768702507019}
{'box': [129, 35, 210, 154], 'label': 0, 'score': 0.829826295375824}
{'box': [537, 514, 638, 593], 'label': 0, 'score': 0.8188229203224182}
{'box': [117, 427, 194, 541], 'label': 0, 'score': 0.7970475554466248}
{'box': [292, 541, 399, 606], 'label': 0, 'score': 0.7924637198448181}
{'box': [107, 634, 184, 749], 'label': 0, 'score': 0.789303719997406}
{'box': [797, 27, 914, 117], 'label': 0, 'score': 0.7714110612869263}
{'box': [702, 385, 808, 456], 'label': 0, 'score': 0.7637060880661011}
{'box': [308, 773, 412, 872], 'label': 0, 'score': 0.7422659993171692}
{'box': [0, 2

{'box': [881, 225, 975, 305], 'label': 0, 'score': 0.911235511302948}
{'box': [518, 634, 646, 710], 'label': 0, 'score': 0.9104604125022888}
{'box': [387, 436, 491, 512], 'label': 0, 'score': 0.9044030904769897}
{'box': [38, 876, 117, 981], 'label': 0, 'score': 0.9007142782211304}
{'box': [734, 279, 808, 382], 'label': 0, 'score': 0.8902929425239563}
{'box': [10, 337, 121, 430], 'label': 0, 'score': 0.8810423612594604}
{'box': [240, 0, 334, 65], 'label': 0, 'score': 0.8775852918624878}
{'box': [832, 409, 921, 509], 'label': 0, 'score': 0.8759432435035706}
{'box': [257, 585, 363, 657], 'label': 0, 'score': 0.8594281077384949}
{'box': [903, 787, 998, 866], 'label': 0, 'score': 0.8518269658088684}
{'box': [240, 816, 357, 888], 'label': 0, 'score': 0.8109163641929626}
{'box': [0, 538, 84, 610], 'label': 0, 'score': 0.7603245377540588}
{'box': [882, 66, 939, 122], 'label': 0, 'score': 0.5037395358085632}
{'box': [797, 268, 888, 364], 'label': 0, 'score': 0.9458587169647217}
{'box': [402, 26

{'box': [499, 461, 590, 557], 'label': 0, 'score': 0.9435839056968689}
{'box': [661, 316, 769, 401], 'label': 0, 'score': 0.9313431978225708}
{'box': [793, 397, 895, 509], 'label': 0, 'score': 0.9258947372436523}
{'box': [240, 859, 320, 972], 'label': 0, 'score': 0.9217153787612915}
{'box': [463, 280, 557, 372], 'label': 0, 'score': 0.9203637838363647}
{'box': [255, 287, 330, 398], 'label': 0, 'score': 0.9193363785743713}
{'box': [784, 48, 885, 148], 'label': 0, 'score': 0.9170312881469727}
{'box': [746, 567, 841, 652], 'label': 0, 'score': 0.9150696992874146}
{'box': [745, 716, 834, 823], 'label': 0, 'score': 0.9147375226020813}
{'box': [344, 651, 420, 765], 'label': 0, 'score': 0.9096328616142273}
{'box': [14, 332, 98, 434], 'label': 0, 'score': 0.9080962538719177}
{'box': [540, 105, 634, 195], 'label': 0, 'score': 0.9058723449707031}
{'box': [35, 154, 128, 250], 'label': 0, 'score': 0.8879668116569519}
{'box': [673, 154, 760, 271], 'label': 0, 'score': 0.8866379261016846}
{'box': [3

In [7]:
# ingest back in the coordinates of detections within an image referenced by their filename

with open('mosaics/metadata/steeplejason_hump_data_1000x1000_overlap25.json') as f:
    img_data = json.load(f)

# open the satellite image
dataset = rasterio.open(infile)
    
geolocated_annotations = []
for annotation in image_annotations:
    for detection in annotation[1]:
        try:            
            local_bounding_box = np.array([[detection[0], detection[1]], [detection[2], detection[1]], [detection[2], detection[3]], [detection[0], detection[3]]]).astype(int)
            image_located_bb = local_bounding_box + [img_data["image_locations"][annotation[0]]]

            geolocated_bb = []
            for point in image_located_bb:
                geolocated_bb.append(dataset.transform * point)
            geolocated_annotations.append(geolocated_bb)
        except ValueError: # if the image doesn't have a detection
            pass

In [22]:
len(geolocated_annotations)

6290

In [79]:
#convert geolocated annotations back to x1, y1, x2, y2 list
#compare converted list to geolocated annotations list and pull out the ones to keep from the geolocated annotations list 
boundingBoxes = []
for annotation in geolocated_annotations:
    x1 = annotation[0][0]
    y1 = annotation[0][1]
    x2 = annotation[1][0]
    y2 = annotation[2][1]
    bounding_box = np.array([x1,y1,x2,y2])
    boundingBoxes.append(bounding_box)


In [80]:
print(boundingBoxes)

[array([ 624153.64381, 4344732.10325,  624154.10317, 4344731.49773]), array([ 624153.84217, 4344730.35455,  624154.34851, 4344729.77513]), array([ 624154.83919, 4344731.33069,  624155.22547, 4344730.79825]), array([ 624155.41339, 4344728.89817,  624155.83099, 4344728.25611]), array([ 624156.10765, 4344729.61331,  624156.63487, 4344729.11219]), array([ 624156.11287, 4344728.05253,  624156.63487, 4344727.39481]), array([ 624152.39623, 4344731.94665,  624152.81905, 4344731.32547]), array([ 624154.52599, 4344729.44627,  624155.05321, 4344729.03389]), array([ 624152.33359, 4344729.90041,  624152.73553, 4344729.30533]), array([ 624153.24709, 4344729.30533,  624153.80563, 4344728.96603]), array([ 624152.28139, 4344728.81987,  624152.68333, 4344728.21957]), array([ 624155.88319, 4344731.98841,  624156.49393, 4344731.51861]), array([ 624155.38729, 4344730.11965,  624155.94061, 4344729.74903]), array([ 624153.33061, 4344728.09429,  624153.87349, 4344727.57751]), array([ 624151.72285, 4344730.714

In [76]:
# Malisiewicz et al.
def non_max_suppression_fast(boxes, overlapThresh):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []
    
    # if the bounding boxes integers, convert them to floats --
    # this is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")
    
    # initialize the list of picked indexes	
    pick = []
    
    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    
    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)
    
    # keep looping while some indexes still remain in the indexes
    # list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the
        # index value to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        
        
        # find the largest (x, y) coordinates for the start of
        # the bounding box and the smallest (x, y) coordinates
        # for the end of the bounding box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        
        
        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        
        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]
        
        # delete all indexes from the index list that are in the suppression list
        idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0])))
    
    # return only the bounding boxes that were picked
    return boxes[pick].astype("int")

In [82]:
for bbox in boundingBoxes:
    pick = non_max_suppression_fast(bbox, 0.3)

IndexError: too many indices for array

In [9]:
# write out the detections as a shapefile

from collections import OrderedDict
import fiona
from fiona.crs import from_epsg

# Define your schema as a polygon geom with a couple of fields
schema = {
    'geometry': 'Polygon',
    'properties': OrderedDict([
        ('ImageName', 'str'),
        ('Detection', 'str')
  ])
}

with fiona.open(
    'shapefiles_final/overlap25/steeplejason_hump_overlap25.shp',
    'w',
    driver='ESRI Shapefile',
    crs=dataset.crs,
    schema=schema) as c:
    
    for num, polygon in enumerate(geolocated_annotations):
        record = {
            'geometry': {'coordinates': [polygon], 'type': 'Polygon'},
            'id': num,
            'properties': OrderedDict([('ImageName', infile),
                                       ('Detection', 'Albatross')
                                       ]),
            'type': 'Feature'}
        c.write(record)
        