In [2]:
import os

import geopandas as gpd
import fiona

import rasterio

import matplotlib.pyplot as plt

In [17]:
in_annotations = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/labels/maui_md_label_envelopes.gpkg"
in_tile_index = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/tiles_shp/maui_512x512_tindex.gpkg"
in_window_retile_dir = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/04_window_retile"

out_dir = r"/media/ross/ssd/00_2015_DAR_marinedebris/maui/labels"

out_path_gpkg = os.path.join(out_dir, "maui_annotations.gpkg")
out_path_csv = os.path.join(out_dir, "maui_annotations_mltrainable.csv")

                               # Debris categories:
remap_debris_classes = {'B':0, # B = Buoys and floats
                       'C':1,  # C = Cloth
                       'F':2,  # F = Foam 
                       'L':3,  # L = Line (single pieces of rope, not net)
                       'M':4,  # M = Metal
                       'N':5,  # N = Net
                       'P':6,  # P = Plastic
                       'T':7,  # T = Tire
                       'W':8,  # W = Processed wood
                       'V':9,  # V = Vessel
                       'O':10} # O = Other 
              

In [4]:
envelopes = gpd.read_file(in_annotations)
in_tindex = gpd.read_file(in_tile_index)

In [5]:
#Find all the intersections of our envelopes and the tile index.

In [6]:
intersection = gpd.overlay(envelopes, in_tindex)

In [7]:
#a little eda showed that a single envelope could produce two valid annotations when split by a image tile edge. So we're going to throw away
# any funny little slivers that we find. We will identify funny slivers by looking at the x:y ratio

In [8]:
normalized_ratio_x_y = abs(1 - ((intersection.bounds['maxx'] - intersection.bounds['minx']) / (intersection.bounds['maxy'] - intersection.bounds['miny'])))

In [9]:
normalized_ratio_x_y

0       0.000000
1       0.000000
2       0.000000
3       0.201197
4       4.970264
          ...   
2239    0.461550
2238    1.081919
2240    0.538450
2242    0.094675
2241    0.905325
Length: 2243, dtype: float64

In [10]:
intersection['normalized_ratio_x_y'] = normalized_ratio_x_y

In [11]:
intersection.head()

Unnamed: 0,unique_pt_id,island,segment,pt_id,lat,long,type,size,min_size_meters,max_size_meters,comment,observer,filename,geometry,normalized_ratio_x_y
0,MA-001-0001,MA,1,1,21.022123,-156.625768,O,1,0.01,0.5,very small,AR,maui_1038_6_15,"POLYGON ((746767.042812153 2326430.389631396, ...",0.0
1,MA-001-0002,MA,1,2,21.022217,-156.624061,B,1,0.01,0.5,,AR,maui_1038_23_14,"POLYGON ((746944.3821923686 2326443.440532876,...",0.0
2,MA-001-0003,MA,1,3,21.023182,-156.620965,P,1,0.01,0.5,,AR,maui_1039_25_3,"POLYGON ((747264.7125466345 2326555.103654591,...",0.0
3,MA-001-0004,MA,1,4,21.023173,-156.620963,P,1,0.01,0.5,,AR,maui_1039_25_3,"POLYGON ((747264.9353615083 2326554.277551278,...",0.201197
4,MA-001-0004,MA,1,4,21.023173,-156.620963,P,1,0.01,0.5,,AR,maui_1039_25_4,"POLYGON ((747264.9353615083 2326554.11005451, ...",4.970264


In [12]:
filtered = intersection[intersection['normalized_ratio_x_y'] <= .7]

In [None]:
filtered.to_file(out_path_gpkg, driver='GPKG')

In [13]:
#Create a couple placeholder columns to store pixel coordinates

In [14]:
filtered['xmin'] = 0
filtered['ymin'] = 0
filtered['xmax'] = 0
filtered['ymax'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [15]:
#final step is to pull the affine transformation grid from each envelope's corresponding image 
#and then convert utm coordinates to pixel coordinates

In [None]:
for i, row in filtered.iterrows():
    #get the mins and the maxs
    xmin = row.geometry.bounds[0]
    xmax = row.geometry.bounds[2]
    ymin = row.geometry.bounds[1]
    ymax = row.geometry.bounds[3]
    
    #
    xs = (xmin, xmax)
    ys = (ymin, ymax)
    
    image_path = os.path.join(in_window_retile_dir, row['filename'] + '.jpg')
    with rasterio.open(image_path, 'r') as src:
        geotrans = src.transform
    
        pix_coords = rasterio.transform.rowcol(geotrans, xs, ys)
    
    filtered.loc[i, 'xmin'] = pix_coords[1][0]
    filtered.loc[i, 'xmax'] = pix_coords[1][1]
    filtered.loc[i, 'ymin'] = pix_coords[0][1]   
    filtered.loc[i, 'ymax'] = pix_coords[0][0]
     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [26]:
#Write to a gpkg for GIS display and a csv for ml training (ml training format matters!)

In [27]:

filtered[['unique_pt_id', 'label', 'filename', 'xmin', 'ymin','xmax', 'ymax', 'normalized_ratio_x_y', 'geometry']].to_file(out_path_gpkg, driver="GPKG")
filtered[['filename', 'xmin', 'ymin','xmax', 'ymax', 'label']].to_csv(out_path_csv)                                                                                                