# Demo of the `tile_atl08` procedure
Paul Montesano, PhD

This performs spatial and quality filters of ATL08 30m granules based on tile extents and our quality flags and thresholds

In [8]:
from maap.maap import MAAP
maap = MAAP()

In [9]:
ICESAT2_BOREAL_REPO_PATH = '/projects/code/icesat2_boreal'               #'/projects/icesat2_boreal' # /projects/Developer/icesat2_boreal/lib
ICESAT2_BOREAL_LIB_PATH = ICESAT2_BOREAL_REPO_PATH + '/lib'

In [10]:
import geopandas
import pandas as pd
import os
import json
import collections
import numpy as np
import sys
import s3fs
import matplotlib.pyplot as plt
sys.path.append(ICESAT2_BOREAL_LIB_PATH)
import maplib_folium
#import ExtractUtils
from folium import TileLayer
print("Importing packages complete.")

Importing packages complete.


In [11]:
TILE_NUM= 168

In [12]:
OUTDIR = '/projects/my-public-bucket/local_output/tile_atl08_demo'

In [13]:
in_tile_fn = '/projects/shared-buckets/montesano/databank/boreal_tiles_v004.gpkg'
in_tile_layer = 'boreal_tiles_v004'
in_tile_id_col = 'tile_num'
csv_list_fn = '/projects/shared-buckets/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv'
landsat_stack_list_fn = 's3://maap-ops-workspace/shared/montesano/DPS_tile_lists/HLS/HLS_stack_2023_v1/HLS_H30_2020/HLS_tindex_master.csv' # /projects/shared-buckets/nathanmthomas/DPS_tile_lists/HLS_tindex_master.csv
topo_stack_list_fn    = 's3://maap-ops-workspace/shared/montesano/DPS_tile_lists/run_build_stack_topo/build_stack_v2023_2/CopernicusGLO30/Topo_tindex_master.csv'
sar_stack_list_fn     = 's3://maap-ops-workspace/shared/montesano/DPS_tile_lists/run_build_stack/build_stack_v2023_2/build_stack_S1/SAR_S1_2020/S1_tindex_master.csv'
MINMONTH = 4
MAXMONTH = 10

In [18]:
args = f"\
--extract_covars \
--do_30m \
--do_dps \
-years_list 2019 2020 2021 \
-o {OUTDIR} \
-in_tile_num {TILE_NUM} \
-in_tile_fn {in_tile_fn} \
-in_tile_layer {in_tile_layer} \
-in_tile_id_col {in_tile_id_col} \
-csv_list_fn {csv_list_fn} \
-topo_stack_list_fn {topo_stack_list_fn} \
-landsat_stack_list_fn {landsat_stack_list_fn} \
-landsat_cols_list Blue Green Red NIR SWIR SWIR2 NDVI SAVI MSAVI NDMI EVI NBR NBR2 TCB TCG TCW ValidMask Xgeo Ygeo JulianDate yearDate \
-user_atl08 lduncanson \
-thresh_sol_el 5 \
-v_ATL08 5 \
-minmonth {MINMONTH} \
-maxmonth {MAXMONTH} \
-LC_filter True"

#print(args)
!time /projects/env/above/bin/python tile_atl08.py $args

Traceback (most recent call last):
  File "/projects/code/icesat2_boreal/lib/tile_atl08.py", line 11, in <module>
    import pandas as pd
  File "/projects/env/above/lib/python3.9/site-packages/pandas/__init__.py", line 46, in <module>
    from pandas.core.api import (
  File "/projects/env/above/lib/python3.9/site-packages/pandas/core/api.py", line 47, in <module>
    from pandas.core.groupby import (
  File "/projects/env/above/lib/python3.9/site-packages/pandas/core/groupby/__init__.py", line 1, in <module>
    from pandas.core.groupby.generic import (
  File "/projects/env/above/lib/python3.9/site-packages/pandas/core/groupby/generic.py", line 67, in <module>
    from pandas.core.frame import DataFrame
  File "/projects/env/above/lib/python3.9/site-packages/pandas/core/frame.py", line 142, in <module>
    from pandas.core.generic import (
  File "/projects/env/above/lib/python3.9/site-packages/pandas/core/generic.py", line 187, in <module>
    from pandas.core.window import (
  Fil

# Results from `tile_atl08`: Map the filtered ATL08 for this tile 

In [19]:
boreal_tiles = geopandas.read_file('/projects/shared-buckets/montesano/databank/boreal_tiles_v004.gpkg')

In [20]:
atl08_pdf_filt = pd.read_csv(os.path.join(OUTDIR, 'atl08_005_30m_filt_20240208_0168.csv'))
print("\nConverting to geopandas data frame in lat/lon ...")
atl08_gdf = geopandas.GeoDataFrame(atl08_pdf_filt, geometry=geopandas.points_from_xy(atl08_pdf_filt.lon, atl08_pdf_filt.lat), crs='epsg:4326')


Converting to geopandas data frame in lat/lon ...


In [21]:
atl08_gdf.shape

(25488, 20)

In [9]:
import maplib_folium
import branca.colormap as cm
pal_height_cmap = cm.LinearColormap(colors = ['black','#636363','#fc8d59','#fee08b','#ffffbf','#d9ef8b','#91cf60','#1a9850'], vmin=0, vmax=25)
pal_height_cmap.caption = 'Vegetation height from  ATL08 @ 30 m (h_can; rh98)'
pal_height_cmap

In [34]:
m = boreal_tiles[boreal_tiles.tile_num == TILE_NUM].boundary.explore(color='red', tiles=maplib_folium.tiler_basemap_image, attr=' ')
atl08_gdf.sample(frac=0.1).explore(m=m, column='h_can', cmap=pal_height_cmap,legend=True)

##### [solved] ... problem: `tile_atl08` not finding the ATL08 granules for a given tile
[solution]: needed to maap.searchGranule() using version 6

# Break down `tile_atl08` into its main steps

In [22]:
import ExtractUtils
import FilterUtils
import tile_atl08

NASA MAAP


## Search: get the ATL08 30m h5 granules that intersect this tile
 - PhoReal processed ATL08 v5 granules at 30m and stored as '.h5' files (DPS 1) 
 - We extracted dataframes (as CSVs) from each of these '.h5' granules


In [23]:
all_atl08_for_tile = ExtractUtils.maap_search_get_h5_list(tile_num=TILE_NUM, id_col=in_tile_id_col, tile_fn=in_tile_fn, layer=in_tile_layer, 
                                                          DATE_START='04-01', DATE_END='10-31', YEARS=[2019,2020,2021], 
                                                          version=6)

	TILE_NUM: 168 (24.077368138647326,61.706101843775286,26.56167100903783,62.829301723925255)
	Searching MAAP for granules using these parameters: 
	[{'short_name': 'ATL08', 'version': '006', 'bounding_box': '24.077368138647326,61.706101843775286,26.56167100903783,62.829301723925255', 'limit': 10000, 'temporal': '2019-04-01T00:00:00Z,2019-10-31T23:59:59Z'}, {'short_name': 'ATL08', 'version': '006', 'bounding_box': '24.077368138647326,61.706101843775286,26.56167100903783,62.829301723925255', 'limit': 10000, 'temporal': '2020-04-01T00:00:00Z,2020-10-31T23:59:59Z'}, {'short_name': 'ATL08', 'version': '006', 'bounding_box': '24.077368138647326,61.706101843775286,26.56167100903783,62.829301723925255', 'limit': 10000, 'temporal': '2021-04-01T00:00:00Z,2021-10-31T23:59:59Z'}]
		# ATL08 for tile 168: 308


In [24]:
print(f"\nReading existing list of ATL08 CSVs: {csv_list_fn}")
all_atl08_csvs_df = pd.read_csv(csv_list_fn)
# Get the s3 location from the location (local_path) indicated in the tindex master csv
all_atl08_csvs_df['s3'] = [tile_atl08.local_to_s3(local_path, 'lduncanson') for local_path in all_atl08_csvs_df['local_path']]


Reading existing list of ATL08 CSVs: /projects/shared-buckets/lduncanson/DPS_tile_lists/ATL08_tindex_master.csv


In [25]:
all_atl08_for_tile = [all_atl08_for_tile[i].replace('_006_','_005_') for i in range(len(all_atl08_for_tile))]

In [26]:
seg_str = '_30m'
all_atl08_csvs_FOUND, all_atl08_csvs_NOT_FOUND = FilterUtils.find_atl08_csv_tile(all_atl08_for_tile, all_atl08_csvs_df, seg_str, col_name='s3') 
print("\t# of ATL08 CSV found for tile {}: {}".format(TILE_NUM, len(all_atl08_csvs_FOUND)))
print("\t# of ATL08 CSV NOT found for tile {}: {}".format(TILE_NUM, len(all_atl08_csvs_NOT_FOUND)))

	Find ATL08 CSVs you expect for a tile based on the h5 granule search...
		# of all ATL08 granules for tile: 308
		# of all_atl08_csvs: 46166
	# of ATL08 CSV found for tile 168: 114
	# of ATL08 CSV NOT found for tile 168: 194


## Filter: spatial 
#### Get ATL08 gdf clipped to tile

In [27]:
# Get tile bounds as xmin,xmax,ymin,ymax
tile = ExtractUtils.get_index_tile(vector_path=in_tile_fn, id_col=in_tile_id_col, tile_id=TILE_NUM, buffer=0, layer=in_tile_layer)

In [28]:
import importlib
import FilterUtils
importlib.reload(FilterUtils)

<module 'FilterUtils' from '/projects/code/icesat2_boreal/lib/FilterUtils.py'>

In [29]:
import multiprocessing as mp
from multiprocessing import Pool
from functools import partial
mp.cpu_count()

32

In [24]:
# Multprocessing this step does not work...
# %%time
# with Pool(processes=1) as pool:
#     atl08_list = pool.map(partial(FilterUtils.filter_atl08_bounds_clip, in_tile_geom_4326=tile['geom_4326']), all_atl08_csvs_FOUND[0:1])

In [23]:
atl08 = pd.concat([  FilterUtils.filter_atl08_bounds_clip(f, tile['geom_4326']) for f in all_atl08_csvs_FOUND ], sort=False, ignore_index=True)

Bounds clipped 34258 obs. down to 0 obs.
Bounds clipped 34258 obs. down to 0 obs.
Bounds clipped 42606 obs. down to 6770 obs.
Bounds clipped 71832 obs. down to 1150 obs.
Bounds clipped 54272 obs. down to 40 obs.
Bounds clipped 146301 obs. down to 8286 obs.
Bounds clipped 76916 obs. down to 109 obs.
Bounds clipped 105311 obs. down to 6559 obs.
Bounds clipped 96260 obs. down to 128 obs.
Bounds clipped 54048 obs. down to 0 obs.
Bounds clipped 123588 obs. down to 6860 obs.
Bounds clipped 42512 obs. down to 0 obs.
Bounds clipped 25249 obs. down to 24 obs.
Bounds clipped 59329 obs. down to 6464 obs.
Bounds clipped 111787 obs. down to 3557 obs.
Bounds clipped 104162 obs. down to 9815 obs.
Bounds clipped 65120 obs. down to 2735 obs.
Bounds clipped 6484 obs. down to 62 obs.
Bounds clipped 36593 obs. down to 1222 obs.
Bounds clipped 48602 obs. down to 3753 obs.
Bounds clipped 56402 obs. down to 856 obs.
Bounds clipped 41075 obs. down to 10293 obs.
Bounds clipped 60252 obs. down to 0 obs.
Bounds 

In [65]:
atl08.shape

(315308, 61)

## Filter: quality

Do the same thing we do in `tile_atl08`

In [62]:
atl08 = FilterUtils.prep_filter_atl08_qual(atl08)


Pre-filter data cleaning...
Pandas version: 2.1.4
	Get beam type from orbit orientation and ground track: ['Weak' 'Strong']
	Cast some columns to:
		type float: ['lat', 'lon', 'h_can', 'h_te_best', 'ter_slp']
		type integer: ['n_ca_ph', 'n_seg_ph', 'n_toc_ph', 'msw_flg', 'seg_snow']


In [64]:
print('Quality filtering with aggressive land-cover based (v3) filters updated in Jan/Feb 2022 ...')
atl08_pdf_filt = FilterUtils.filter_atl08_qual_v3(atl08, SUBSET_COLS=True, DO_PREP=False,
                                                  subset_cols_list = ['rh25','rh50','rh60','rh70','rh75','rh80','rh90','h_can','h_max_can', 'ter_slp','h_te_best', 'seg_landcov','sol_el','y','m','doy'] + ['seg_cover'], #, 'granule_name'
                                           filt_cols=['h_can','h_dif_ref','m','msw_flg','beam_type','seg_snow','sig_topo'], 
                                           list_lc_h_can_thresh=[0, 60, 60, 60, 60, 60, 60, 50, 50, 50, 50, 50, 50, 20, 10, 10, 5, 5, 0, 0, 0, 0, 0],
                                           thresh_h_can=100, thresh_h_dif=25, thresh_sig_topo=2.5, month_min=MINMONTH, month_max=MAXMONTH) 

Quality filtering with aggressive land-cover based (v3) filters updated in Jan/Feb 2022 ...

Filtering by quality
	Before quality filtering: 		315308 observations in the input dataframe.
	After msw_flg=0: 		177894 observations in the dataframe.
	After beam_type=Strong: 		111840 observations in the dataframe.
	After seg_snow=1: 		110136 observations in the dataframe.
	Land cover threshold dictionary: 
{0: 0, 111: 60, 113: 60, 112: 60, 114: 60, 115: 60, 116: 60, 121: 50, 123: 50, 122: 50, 124: 50, 125: 50, 126: 50, 20: 20, 30: 10, 90: 10, 100: 5, 60: 5, 40: 0, 50: 0, 70: 0, 80: 0, 200: 0}
	After land-cover specific h_can thresholds: 		71634 observations in the dataframe.
	After h_can_unc <5, seg_cover<32767, sol_el<5, sig_topo<2.5, h_dif_ref<25: 		25488 observations in the dataframe.
	After month filters: 4-10
	After all quality filtering: 		25488 observations in the output dataframe.
	Returning a pandas data frame.
	Filtered obs. for columns: ['lon', 'lat', 'rh25', 'rh50', 'rh60', 'rh70

In [77]:
pct_obs_remaining = atl08_pdf_filt.shape[0] / atl08.shape[0] * 100 
print(f'Quality filtering reduced # valid obs by {round(100 - pct_obs_remaining, 1)}%')

Quality filtering reduced # valid obs by 91.9%


## Test Covar Extraction to ATL08

In [54]:
import importlib
import ExtractUtils
importlib.reload(ExtractUtils)

NASA MAAP


<module 'ExtractUtils' from '/projects/code/icesat2_boreal/lib/ExtractUtils.py'>

In [30]:
topo_covar_fn = tile_atl08.get_stack_fn(topo_stack_list_fn, TILE_NUM, user=None, col_name='local_path')
topo_covar_fn


Getting stack fn from:  s3://maap-ops-workspace/shared/montesano/DPS_tile_lists/run_build_stack_topo/build_stack_v2023_2/CopernicusGLO30/Topo_tindex_master.csv
	 s3://maap-ops-workspace/montesano/dps_output/run_build_stack_topo/build_stack_v2023_2/CopernicusGLO30/2024/01/13/12/46/57/900208/CopernicusGLO30_168_cog_topo_stack.tif


's3://maap-ops-workspace/montesano/dps_output/run_build_stack_topo/build_stack_v2023_2/CopernicusGLO30/2024/01/13/12/46/57/900208/CopernicusGLO30_168_cog_topo_stack.tif'

In [55]:
atl08_extract_topo = ExtractUtils.extract_value_gdf_s3(topo_covar_fn, atl08.head(), None, reproject=True)


Getting stack fn from:  s3://maap-ops-workspace/shared/montesano/DPS_tile_lists/run_build_stack_topo/build_stack_v2023_2/CopernicusGLO30/Topo_tindex_master.csv
	 s3://maap-ops-workspace/montesano/dps_output/run_build_stack_topo/build_stack_v2023_2/CopernicusGLO30/2024/01/13/12/46/57/900208/CopernicusGLO30_168_cog_topo_stack.tif
	Extracting raster values from:  s3://maap-ops-workspace/montesano/dps_output/run_build_stack_topo/build_stack_v2023_2/CopernicusGLO30/2024/01/13/12/46/57/900208/CopernicusGLO30_168_cog_topo_stack.tif
	Re-project points to match raster...
	Geting bandnames list from raster descriptions...
	Returning 5 points with 5 new raster value columns: ['elevation', 'slope', 'tsri', 'tpi', 'slopemask']


In [56]:
sar_covar_fn = tile_atl08.get_stack_fn(sar_stack_list_fn, TILE_NUM, user='montesano', col_name='local_path')
atl08_extract_sar = ExtractUtils.extract_value_gdf_s3(sar_covar_fn, atl08.head(), None, reproject=True)


Getting stack fn from:  s3://maap-ops-workspace/shared/montesano/DPS_tile_lists/run_build_stack/build_stack_v2023_2/build_stack_S1/SAR_S1_2020/S1_tindex_master.csv
	 s3://maap-ops-workspace/montesano/dps_output/run_build_stack/build_stack_v2023_2/build_stack_S1/SAR_S1_2020/2024/01/18/10/54/18/776985/SAR_S1_2020_168_cog.tif
	Extracting raster values from:  s3://maap-ops-workspace/montesano/dps_output/run_build_stack/build_stack_v2023_2/build_stack_S1/SAR_S1_2020/2024/01/18/10/54/18/776985/SAR_S1_2020_168_cog.tif
	Re-project points to match raster...
	Geting bandnames list from raster descriptions...
	Returning 5 points with 9 new raster value columns: ['vv_median_frozen', 'vh_median_frozen', 'vv_median_summer', 'vh_median_summer', 'vv_median_shoulder', 'vh_median_shoulder', 'n_frozen', 'n_summer', 'n_shoulder']


In [57]:
hls_covar_fn = tile_atl08.get_stack_fn(landsat_stack_list_fn, TILE_NUM, user='montesano', col_name='local_path')
atl08_extract_hls = ExtractUtils.extract_value_gdf_s3(hls_covar_fn, atl08.head(), None, reproject=True)


Getting stack fn from:  s3://maap-ops-workspace/shared/montesano/DPS_tile_lists/HLS/HLS_stack_2023_v1/HLS_H30_2020/HLS_tindex_master.csv
	 s3://maap-ops-workspace/montesano/dps_output/do_HLS_stack_3-1-2/HLS_stack_2023_v1/HLS_H30_2020/2023/04/11/01/53/49/840528/HLS_168_07-01_08-31_2020_2020.tif
	Extracting raster values from:  s3://maap-ops-workspace/montesano/dps_output/do_HLS_stack_3-1-2/HLS_stack_2023_v1/HLS_H30_2020/2023/04/11/01/53/49/840528/HLS_168_07-01_08-31_2020_2020.tif
	Re-project points to match raster...
	Geting bandnames list from raster descriptions...
	Returning 5 points with 21 new raster value columns: ['Blue', 'Green', 'Red', 'NIR', 'SWIR', 'SWIR2', 'NDVI', 'SAVI', 'MSAVI', 'NDMI', 'EVI', 'NBR', 'NBR2', 'TCB', 'TCG', 'TCW', 'ValidMask', 'Xgeo', 'Ygeo', 'JulianDate', 'yearDate']


In [58]:
atl08_extract_hls

Unnamed: 0,fid,lon,lat,dt,orb_orient,orb_num,rgt,gt,segid_beg,segid_end,...,NBR,NBR2,TCB,TCG,TCW,ValidMask,Xgeo,Ygeo,JulianDate,yearDate
0,34917,25.759275,61.91441,b'2020-05-04T10:33:32.000000Z',1,9117,594,b'gt2l',344241.532366,344245.532366,...,0.709852,0.423206,0.202452,0.199669,-0.020284,1.0,8163439.0,-2546683.0,231.0,2020.0
1,34918,25.759153,61.914944,b'2020-05-04T10:33:32.000000Z',1,9117,594,b'gt2l',344244.513733,344248.513733,...,0.664661,0.394203,0.173716,0.162022,-0.020495,1.0,8163499.0,-2546743.0,231.0,2020.0
2,56569,25.808488,61.97204,b'2020-05-04T10:33:32.000000Z',1,9117,594,b'gt3l',344546.963859,344550.963859,...,0.773686,0.463066,0.201657,0.208552,-0.010437,1.0,8166649.0,-2552923.0,231.0,2020.0
3,56570,25.808427,61.972308,b'2020-05-04T10:33:32.000000Z',1,9117,594,b'gt3l',344548.461297,344552.461297,...,0.764706,0.455108,0.201004,0.208075,-0.012182,1.0,8166679.0,-2552953.0,231.0,2020.0
4,56571,25.807892,61.974715,b'2020-05-04T10:33:32.000000Z',1,9117,594,b'gt3l',344561.929031,344565.929031,...,0.664865,0.443946,0.037004,0.021257,0.000672,1.0,8166889.0,-2553103.0,233.0,2020.0
