# Build a tile index (`tindex`) for a set of DPS output

In [21]:
from maap.maap import MAAP
maap = MAAP()

In [22]:
ICESAT2_BOREAL_REPO_PATH = '/projects/code/icesat2_boreal'     
ICESAT2_BOREAL_LIB_PATH = ICESAT2_BOREAL_REPO_PATH + '/lib'

In [23]:
import geopandas as gpd
import pandas as pd
import os

import sys
import s3fs

sys.path.append(ICESAT2_BOREAL_LIB_PATH)

print("Importing packages complete.")

Importing packages complete.


In [24]:
import importlib
import mosaiclib
importlib.reload(mosaiclib)
importlib.reload(maplib_folium)

<module 'maplib_folium' from '/projects/code/icesat2_boreal/lib/maplib_folium.py'>

In [25]:
import mosaiclib
from mosaiclib import *

## Build new tindex and MosiacJSONs

In [29]:
DICT_BUILD_TINDEX = {
    'SET' : 'BOREAL_MAP',
    'USER' : 'lduncanson',
    'ALG_NAME' : 'run_boreal_biomass_map',
    'ALG_VERSION' : 'boreal_agb_2024_v6', 
    'VAR' : 'AGB',
    'VAR' : 'HT',
    # In my bucket, this is ALWAYS used to identify output
    #'BATCH_NAME' : 'AGB_L30_2020/min_n_1000_funks', #min_n_1000_funks min_n_5000_funks
    #'BATCH_NAME' : 'AGB_H30_2020/outlier_remove_fullboreal',
    'BATCH_NAME' : 'AGB_H30_2020/Version2_SD',
    'BATCH_NAME' : 'Ht_H30_2020/Version2_SD',
    'YEAR': 2024,
    'DPS_MONTH_LIST' : '07 08 09 10',        
    'DPS_DAY_MIN' : 1 ,
    'TILES_INDEX_PATH': boreal_tile_index_path
}

In [30]:
DICT_BUILD_TINDEX

{'SET': 'BOREAL_MAP',
 'USER': 'lduncanson',
 'ALG_NAME': 'run_boreal_biomass_map',
 'ALG_VERSION': 'boreal_agb_2024_v6',
 'VAR': 'HT',
 'BATCH_NAME': 'Ht_H30_2020/Version2_SD',
 'YEAR': 2024,
 'DPS_MONTH_LIST': '07 08 09 10',
 'DPS_DAY_MIN': 1,
 'TILES_INDEX_PATH': '/projects/shared-buckets/montesano/databank/boreal_tiles_v004.gpkg'}

In [31]:
%%time
if True:
    DPS_IDENTIFIER = f"{DICT_BUILD_TINDEX['ALG_VERSION']}/{DICT_BUILD_TINDEX['BATCH_NAME']}"

    OUTDIR = f"/projects/my-public-bucket/DPS_tile_lists/{DICT_BUILD_TINDEX['SET']}/{DPS_IDENTIFIER}"
    !mkdir -p $OUTDIR

    # Build tindex
    args = f"--RETURN_DUPS --user {DICT_BUILD_TINDEX['USER']} --dps_identifier {DPS_IDENTIFIER} \
    -alg_name {DICT_BUILD_TINDEX['ALG_NAME']} -t {DICT_BUILD_TINDEX['VAR']} -y 2024 \
    --dps_month_list {DICT_BUILD_TINDEX['DPS_MONTH_LIST']} -d_min {DICT_BUILD_TINDEX['DPS_DAY_MIN']} --outdir {OUTDIR} \
    -boreal_tile_index_path {DICT_BUILD_TINDEX['TILES_INDEX_PATH']}"

    !time /projects/env/above/bin/python /projects/code/icesat2_boreal/lib/build_tindex_master.py $args --WRITE_TINDEX_MATCHES_GDF

NASA MAAP
INFO: Pandarallel will run on 25 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
NASA MAAP

Building a list of tiles:
DPS ID:		boreal_agb_2024_v6/Ht_H30_2020/Version2_SD
Type:		HT
Year:		['2024']
Month:		['07', '08', '09', '10']
Days:		1-31

Output dir:  /projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD
Rows before NaN removal: 4875
Paths creating null tile_nums: ['s3://maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_map/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/2024/07/25/19/40/42/974792/boreal_ht_202407251721961551_4180700.tif', 's3://maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_map/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/2024/07/25/19/43/07/546546/boreal_ht_202407251721961690_4199500.tif']
Rows after NaN removal: 4873

No duplicates found.

# of duplicate tiles: 0
Final # of tiles: 4873
Writing duplicates csv: /projects/my-public-bucket

### Check the tindex

In [32]:
fn = os.path.join(OUTDIR,f"{DICT_BUILD_TINDEX['VAR']}_tindex_master.csv")
fn

'/projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/HT_tindex_master.csv'

In [33]:
tindex_df = pd.read_csv(fn)

In [34]:
tindex_df.shape

(4873, 5)

In [43]:
tindex_df.tail(2)

Unnamed: 0,index,s3_path,local_path,file,tile_num
4871,4871,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202408281724875729_0023778.tif,23778
4872,4872,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202408281724879767_0023217.tif,23217


In [40]:
tindex_gdf = gpd.read_file(os.path.join(OUTDIR,f"{DICT_BUILD_TINDEX['VAR']}_tindex_master.gpkg"))
boreal_tiles = gpd.read_file(boreal_tile_index_path.replace('.gpkg','_model_ready.gpkg'))

In [42]:
NEED_LIST = boreal_tiles[~boreal_tiles.tile_num.isin(tindex_gdf.tile_num.to_list())].tile_num.to_list()
len(NEED_LIST)

83

In [None]:
boreal_tiles[~boreal_tiles.tile_num.isin(tindex_gdf.tile_num.to_list())].explore()

In [17]:
s3 = s3fs.S3FileSystem(anon=True)

In [20]:
s3.download(tindex_df[tindex_df.tile_num == 2996].s3_path.to_list()[0], '/projects/my-public-bucket/tmp')

[None]