# Build a tile index (`tindex`) for a set of DPS output

In [1]:
from maap.maap import MAAP
#maap = MAAP("api.maap-project.org")
maap = MAAP()

In [2]:
ICESAT2_BOREAL_REPO_PATH = '/projects/code/icesat2_boreal'     
ICESAT2_BOREAL_LIB_PATH = ICESAT2_BOREAL_REPO_PATH + '/lib'

In [3]:
import geopandas as gpd
import pandas as pd
import os
import json
import collections
import numpy as np
import sys
import s3fs
import matplotlib.pyplot as plt

sys.path.append(ICESAT2_BOREAL_LIB_PATH)
import maplib_folium
from folium import TileLayer
print("Importing packages complete.")

Importing packages complete.


In [4]:
import importlib
import mosaiclib
importlib.reload(mosaiclib)
importlib.reload(maplib_folium)

<module 'maplib_folium' from '/projects/code/icesat2_boreal/lib/maplib_folium.py'>

In [96]:
import mosaiclib
from mosaiclib import *

## Build new tindex and MosiacJSONs

In [78]:
DICT_BUILD_TINDEX = {
    'SET' : 'BOREAL_MAP_TEST',
    'USER' : 'lduncanson',
    'ALG_NAME' : 'run_boreal_biomass_map',
    'ALG_VERSION' : 'boreal_agb_2024_v6', 
    'VAR' : 'AGB',
    'VAR' : 'HT',
    # In my bucket, this is ALWAYS used to identify output
    #'BATCH_NAME' : 'AGB_L30_2020/min_n_1000_funks', #min_n_1000_funks min_n_5000_funks
    #'BATCH_NAME' : 'AGB_H30_2020/outlier_remove_fullboreal',
    'BATCH_NAME' : 'AGB_H30_2020/Version2_SD',
    'BATCH_NAME' : 'Ht_H30_2020/Version2_SD',
    'YEAR': 2024,
    'DPS_MONTH_LIST' : '07 08 09',        
    'DPS_DAY_MIN' : 1 ,
    'TILES_INDEX_PATH': boreal_tile_index_path
}

In [79]:
DICT_BUILD_TINDEX

{'SET': 'BOREAL_MAP_TEST',
 'USER': 'lduncanson',
 'ALG_NAME': 'run_boreal_biomass_map',
 'ALG_VERSION': 'boreal_agb_2024_v6',
 'VAR': 'HT',
 'BATCH_NAME': 'Ht_H30_2020/Version2_SD',
 'YEAR': 2024,
 'DPS_MONTH_LIST': '07 08 09',
 'DPS_DAY_MIN': 1,
 'TILES_INDEX_PATH': '/projects/shared-buckets/montesano/databank/boreal_tiles_v004.gpkg'}

In [102]:
%%time
if True:
    DPS_IDENTIFIER = f"{DICT_BUILD_TINDEX['ALG_VERSION']}/{DICT_BUILD_TINDEX['BATCH_NAME']}"

    OUTDIR = f"/projects/my-public-bucket/DPS_tile_lists/{DICT_BUILD_TINDEX['SET']}/{DPS_IDENTIFIER}"
    !mkdir -p $OUTDIR

    # Build tindex
    args = f"--RETURN_DUPS --user {DICT_BUILD_TINDEX['USER']} --dps_identifier {DPS_IDENTIFIER} \
    -alg_name {DICT_BUILD_TINDEX['ALG_NAME']} -t {DICT_BUILD_TINDEX['VAR']} -y 2024 \
    --dps_month_list {DICT_BUILD_TINDEX['DPS_MONTH_LIST']} -d_min {DICT_BUILD_TINDEX['DPS_DAY_MIN']} --outdir {OUTDIR} \
    -boreal_tile_index_path {DICT_BUILD_TINDEX['TILES_INDEX_PATH']}"

    !time /projects/env/above/bin/python /projects/code/icesat2_boreal/lib/build_tindex_master.py $args --WRITE_TINDEX_MATCHES_GDF

NASA MAAP
INFO: Pandarallel will run on 25 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
NASA MAAP

Building a list of tiles:
DPS ID:		boreal_agb_2024_v6/Ht_H30_2020/Version2_SD
Type:		HT
Year:		['2024']
Month:		['07', '08', '09']
Days:		1-31

Output dir:  /projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP_TEST/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD

No duplicates found.

# of duplicate tiles: 0
Final # of tiles: 4873
Writing duplicates csv: /projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP_TEST/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/HT_tindex_master_duplicates.csv
Writing tindex master csv: /projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP_TEST/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/HT_tindex_master.csv
Building geodataframe of matches, tindex master json, and mosaic json...
Tile index matches geodataframe for json: (4873, 7)
Building /projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP_TEST/boreal_agb

### Check the tindex

In [97]:
fn = os.path.join(OUTDIR,f"{DICT_BUILD_TINDEX['VAR']}_tindex_master.csv")
fn

'/projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP_TEST/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/HT_tindex_master.csv'

In [98]:
tindex_df = pd.read_csv(fn)

In [99]:
tindex_df.shape

(2504, 5)

In [86]:
tindex_df.tail()

Unnamed: 0,index,s3_path,local_path,file,tile_num
1627,1627,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202408281724859081_0004107.tif,4107
1628,1628,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202408281724859219_0004344.tif,4344
1629,1629,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202408281724859222_0003840.tif,3840
1630,1630,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202408281724859622_0001796.tif,1796
1631,1631,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,boreal_ht_2020_202408281724860455_0004176.tif,4176


In [54]:
tindex_gdf = gpd.read_file(os.path.join(OUTDIR,f"{DICT_BUILD_TINDEX['VAR']}_tindex_master.gpkg"))

In [91]:
#tindex_gdf.explore()