In [None]:
import os

import geopandas as gpd
import fiona

import rasterio

import matplotlib.pyplot as plt

In [None]:
in_annotations = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/labels/maui_md_label_envelopes.gpkg"
in_tile_index = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/tiles_shp/maui_512x512_tindex.gpkg"
in_window_retile_dir = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/04_window_retile"

out_dir = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/labels"

out_path_gpkg = os.path.join(out_dir, "maui_annotations.gpkg")
out_path_csv = os.path.join(out_dir, "maui_annotations_mltrainable.csv")

                               # Debris categories:
remap_debris_classes = {'B':0, # B = Buoys and floats
                       'C':1,  # C = Cloth
                       'F':2,  # F = Foam 
                       'L':3,  # L = Line (single pieces of rope, not net)
                       'M':4,  # M = Metal
                       'N':5,  # N = Net
                       'P':6,  # P = Plastic
                       'T':7,  # T = Tire
                       'W':8,  # W = Processed wood
                       'V':9,  # V = Vessel
                       'O':10} # O = Other 
              

In [None]:
envelopes = gpd.read_file(in_annotations)
in_tindex = gpd.read_file(in_tile_index)

In [None]:
#Find all the intersections of our envelopes and the tile index.

In [None]:
intersection = gpd.overlay(envelopes, in_tindex)

In [None]:
#a little eda showed that a single envelope could produce two valid annotations when split by a image tile edge. So we're going to throw away
# any funny little slivers that we find. We will identify funny slivers by looking at the x:y ratio

In [None]:
normalized_ratio_x_y = abs(1 - ((intersection.bounds['maxx'] - intersection.bounds['minx']) / (intersection.bounds['maxy'] - intersection.bounds['miny'])))

In [None]:
normalized_ratio_x_y

In [None]:
intersection['normalized_ratio_x_y'] = normalized_ratio_x_y

In [None]:
intersection.head()

In [None]:
filtered = intersection[intersection['normalized_ratio_x_y'] <= .7]

In [None]:
filtered.to_file(out_path_gpkg, driver='GPKG')

In [None]:
#Create a couple placeholder columns to store pixel coordinates

In [None]:
filtered['xmin'] = 0
filtered['ymin'] = 0
filtered['xmax'] = 0
filtered['ymax'] = 0

In [None]:
#final step is to pull the affine transformation grid from each envelope's corresponding image 
#and then convert utm coordinates to pixel coordinates

In [None]:
for i, row in filtered.iterrows():
    #get the mins and the maxs
    xmin = row.geometry.bounds[0]
    xmax = row.geometry.bounds[2]
    ymin = row.geometry.bounds[1]
    ymax = row.geometry.bounds[3]
    
    #
    xs = (xmin, xmax)
    ys = (ymin, ymax)
    
    image_path = os.path.join(in_window_retile_dir, row['filename'] + '.jpg')
    with rasterio.open(image_path, 'r') as src:
        geotrans = src.transform
    
        pix_coords = rasterio.transform.rowcol(geotrans, xs, ys)
    
    filtered.loc[i, 'xmin'] = pix_coords[1][0]
    filtered.loc[i, 'xmax'] = pix_coords[1][1]
    filtered.loc[i, 'ymin'] = pix_coords[0][1]   
    filtered.loc[i, 'ymax'] = pix_coords[0][0]
     

In [None]:
#Write to a gpkg for GIS display and a csv for ml training (ml training format matters!)

In [None]:

filtered[['unique_pt_id', 'label', 'filename', 'xmin', 'ymin','xmax', 'ymax', 'normalized_ratio_x_y', 'geometry']].to_file(out_path_gpkg, driver="GPKG")
filtered[['filename', 'xmin', 'ymin','xmax', 'ymax', 'label']].to_csv(out_path_csv)                                                                                                