In [1]:
import os

import geopandas as gpd
import fiona

import rasterio

import matplotlib.pyplot as plt

In [2]:
in_annotations = r"/media/ross/ssd/00_2015_DAR_marinedebris/kahoolawe/labels/kahoolawe_annotations_point_utm4n.geojson"
in_tile_index = r"/media/ross/ssd/00_2015_DAR_marinedebris/kahoolawe/tiles_shp/kahoolawe_512x512_tile_index.gpkg"
in_window_retile_dir = r"/media/ross/ssd/00_2015_DAR_marinedebris/kahoolawe/04_window_retile"

out_dir = r"/media/ross/ssd/00_2015_DAR_marinedebris/kahoolawe/labels"

out_path_gpkg = os.path.join(out_dir, "kahoolawe_annotations.gpkg")
out_path_csv = os.path.join(out_dir, "kahoolawe_annotations_mltrainable.csv")

                               # Debris categories:
remap_debris_classes = {'B':0, # B = Buoys and floats
                       'C':1,  # C = Cloth
                       'F':2,  # F = Foam 
                       'L':3,  # L = Line (single pieces of rope, not net)
                       'M':4,  # M = Metal
                       'N':5,  # N = Net
                       'P':6,  # P = Plastic
                       'T':7,  # T = Tire
                       'W':8,  # W = Processed wood
                       'V':9,  # V = Vessel
                       'O':10} # O = Other 
              

In [3]:
in_anno = gpd.read_file(in_annotations)
in_tindex = gpd.read_file(in_tile_index)

In [4]:
#Create envelopes from the points based on the attribute 'max_size_meters'

In [5]:
def buffer_by_attr(row):
    return row.geometry.buffer(row.max_size_meters)

In [6]:
buffered = in_anno.copy()

In [7]:
buffered['geometry'] = buffered.apply(buffer_by_attr, axis=1)

In [8]:
envelopes_raw = buffered.copy()

In [9]:
envelopes_raw['geometry'] = buffered.envelope
len(envelopes_raw)

1298

In [10]:
#do some clean up on the attributes so we have only what we need

In [11]:
envelopes_raw['label'] = envelopes_raw['type']

In [12]:
envelopes = envelopes_raw[['unique_pt_id', 'label', 'geometry']].replace({'label':remap_debris_classes})

In [13]:
envelopes.head()

Unnamed: 0,unique_pt_id,label,geometry
0,KO-001-0001,6,"POLYGON ((753040.5071327947 2280099.155738274,..."
1,KO-001-0002,6,"POLYGON ((753044.4685932701 2280101.566776025,..."
2,KO-001-0003,5,"POLYGON ((753043.648511753 2280098.316467939, ..."
3,KO-001-0004,6,"POLYGON ((753046.1782535802 2280098.823095408,..."
4,KO-001-0005,6,"POLYGON ((753048.5781626365 2280098.748135485,..."


In [14]:
#Find all the intersections of our envelopes and the tile index.

In [15]:
intersection = gpd.overlay(envelopes, in_tindex)

In [16]:
#a little eda showed that a single envelope could produce two valid annotations when split by a image tile edge. So we're going to throw away
# any funny little slivers that we find. We will identify funny slivers by looking at the x:y ratio

In [17]:
normalized_ratio_x_y = abs(1 - ((intersection.bounds['maxx'] - intersection.bounds['minx']) / (intersection.bounds['maxy'] - intersection.bounds['miny'])))

In [18]:
normalized_ratio_x_y

0        4.429249
3       16.840292
1        0.225772
2        0.000000
5        0.172862
          ...    
1702     0.661821
1703     2.969338
1706     4.225761
1705     1.539343
1711     0.000000
Length: 1713, dtype: float64

In [19]:
intersection['normalized_ratio_x_y'] = normalized_ratio_x_y

In [20]:
intersection.head()

Unnamed: 0,unique_pt_id,label,filename,geometry,normalized_ratio_x_y
0,KO-001-0001,6,kahoolawe_107_8_7,"POLYGON ((753040.5071327947 2280104.050613149,...",4.429249
3,KO-001-0003,5,kahoolawe_107_8_7,"POLYGON ((753043.648511753 2280104.050613149, ...",16.840292
1,KO-001-0001,6,kahoolawe_107_9_7,"POLYGON ((753040.5071327947 2280099.155738274,...",0.225772
2,KO-001-0002,6,kahoolawe_107_9_7,"POLYGON ((753044.4685932701 2280101.566776025,...",0.0
5,KO-001-0003,5,kahoolawe_107_9_7,"POLYGON ((753043.648511753 2280098.316467939, ...",0.172862


In [21]:
filtered = intersection[intersection['normalized_ratio_x_y'] <= .7]

In [22]:
#Create a couple placeholder columns to store pixel coordinates

In [23]:
filtered['xmin'] = 0
filtered['ymin'] = 0
filtered['xmax'] = 0
filtered['ymax'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [24]:
#final step is to pull the affine transformation grid from each envelope's corresponding image 
#and then convert utm coordinates to pixel coordinates

In [25]:
for i, row in filtered.iterrows():
    #get the mins and the maxs
    xmin = row.geometry.bounds[0]
    xmax = row.geometry.bounds[2]
    ymin = row.geometry.bounds[1]
    ymax = row.geometry.bounds[3]
    
    #
    xs = (xmin, xmax)
    ys = (ymin, ymax)
    
    image_path = os.path.join(in_window_retile_dir, row['filename'] + '.jpg')
    with rasterio.open(image_path, 'r') as src:
        geotrans = src.transform
    
        pix_coords = rasterio.transform.rowcol(geotrans, xs, ys)
    
    filtered.loc[i, 'xmin'] = pix_coords[1][0]
    filtered.loc[i, 'xmax'] = pix_coords[1][1]
    filtered.loc[i, 'ymin'] = pix_coords[0][1]   
    filtered.loc[i, 'ymax'] = pix_coords[0][0]
     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [26]:
#Write to a gpkg for GIS display and a csv for ml training (ml training format matters!)

In [27]:

filtered[['unique_pt_id', 'label', 'filename', 'xmin', 'ymin','xmax', 'ymax', 'normalized_ratio_x_y', 'geometry']].to_file(out_path_gpkg, driver="GPKG")
filtered[['filename', 'xmin', 'ymin','xmax', 'ymax', 'label']].to_csv(out_path_csv)                                                                                                