# Review DPS outputs

Make a mosaic of DPS outputs.

1. make a list of the DPS output paths with build_tindex.master.py
2. Identify duplicate tiles
3. Identify matching tiles; merge tindex.master with the original index tile file
3. convert the local paths to s3 paths
4. reproject the bounds to 4326 - convert to geojson (save file or in mem object)
5. make mosaicjson Mosaic.from_features
6. send mosaicjson to the tile server

In [4]:
import geopandas as gpd
import pandas as pd
import os
import json
import collections

def local_to_s3(url, user = 'nathanmthomas', type='public'):
    ''' A Function to convert local paths to s3 urls'''
    if type is 'public':
        replacement_str = f's3://maap-ops-workspace/shared/{user}'
    else:
        replacement_str = f's3://maap-ops-workspace/{user}'
    return url.replace(f'/projects/my-{type}-bucket', replacement_str)

In [5]:
if False:
    os.system("python /projects/icesat2_boreal/lib/build_tindex_master.py --type ATL08_filt")
    
tindex_master_fn = 's3://maap-ops-workspace/shared/lduncanson/DPS_tile_lists/ATL08_filt_tindex_master.csv'

In [6]:
# Build up a dataframe from the list of dps output files
tindex_master = pd.read_csv(tindex_master_fn)
tindex_master['s3'] = [local_to_s3(local_path, user='lduncanson') for local_path in tindex_master['local_path']]
print(tindex_master.head())

# Get boreal tiles
boreal_tile_index_path = '/projects/shared-buckets/nathanmthomas/boreal_grid_albers90k_gpkg.gpkg'
boreal_tile_index = geopandas.read_file(boreal_tile_index_path)
boreal_tile_index.astype({'layer':'int'})
boreal_tile_index.rename(columns={"layer":"tile_num"}, inplace=True)
boreal_tile_index["tile_num"] = boreal_tile_index["tile_num"].astype(int)

bad_tiles = [3540,3634,3728,3823,3916,4004] #Dropping the tiles near antimeridian that reproject poorly.

# For some reason, doing this causes 'MosaicJSON.from_features()' to fail...(below)
if True:
    # Remove bad tiles
    boreal_tile_index = boreal_tile_index[~boreal_tile_index['tile_num'].isin(bad_tiles)]
    
print(boreal_tile_index.head())

   Unnamed: 0                                         local_path  tile_num  \
0           0  /projects/my-private-bucket/dps_output/run_til...       986   
1           1  /projects/my-private-bucket/dps_output/run_til...       986   
2           2  /projects/my-private-bucket/dps_output/run_til...       979   
3           3  /projects/my-private-bucket/dps_output/run_til...       987   
4           4  /projects/my-private-bucket/dps_output/run_til...       982   

                                                  s3  
0  /projects/my-private-bucket/dps_output/run_til...  
1  /projects/my-private-bucket/dps_output/run_til...  
2  /projects/my-private-bucket/dps_output/run_til...  
3  /projects/my-private-bucket/dps_output/run_til...  
4  /projects/my-private-bucket/dps_output/run_til...  


NameError: name 'geopandas' is not defined

# Identify duplicate tiles

In [None]:
duplicate_tiles = [item for item, count in collections.Counter(tindex_master["tile_num"]).items() if count > 1]
print(duplicate_tiles)

# Identify matching tiles

In [None]:
# Select the rows we have results for
tile_matches = boreal_tile_index.merge(tindex_master[~tindex_master['tile_num'].isin(bad_tiles)], how='right', on='tile_num')
print(tile_matches.shape)

tile_matches_duplicates = boreal_tile_index.merge(tindex_master[tindex_master['tile_num'].isin(duplicate_tiles)], how='right', on='tile_num')
print(tile_matches_duplicates.shape)

tile_matches_geojson_string = tile_matches.to_crs("EPSG:4326").to_json()
tile_matches_geojson = json.loads(tile_matches_geojson_string)

tile_matches_duplicates_geojson_string = tile_matches_duplicates.to_crs("EPSG:4326").to_json()
tile_matches_duplicates_geojson = json.loads(tile_matches_duplicates_geojson_string)