# Project-level bbox and tindex

## Create intersection of various selections of tiles  
### Used to identify set for ORNL DAAC delivery
Updated: Oct 2024 | PMM

In [None]:
ICESAT2_BOREAL_REPO_PATH = '/projects/code/icesat2_boreal' #'/projects/icesat2_boreal' # /projects/Developer/icesat2_boreal/lib
ICESAT2_BOREAL_LIB_PATH = ICESAT2_BOREAL_REPO_PATH + '/lib'
#!pip install -U -r $ICESAT2_BOREAL_REPO_PATH/dps/requirements_main.txt

In [None]:
!pip install contextily

In [None]:
!pip install cogeo_mosaic

In [None]:
from shapely.geometry import box
import geopandas as gpd
from geopandas import GeoDataFrame
import pandas as pd
import contextily as ctx
import os
import sys
sys.path.append('/projects/code/icesat2_boreal/lib')
#import maplib_folium
import ExtractUtils

#### Helper functions

In [None]:
def build_mosaic_json(input_tindex_gdf, input_json_fn, out_masic_json_fn):

    from cogeo_mosaic.mosaic import MosaicJSON
    from cogeo_mosaic.backends import MosaicBackend
    from typing import Dict

    def get_accessor(feature: Dict):
        """Return specific feature identifier."""
        return feature["properties"]["s3_path"]

    # Step 1 get the gdf of the tiles matches to the tindex master csv (from build_tindex_master.py on the dps_output)
    #tile_index_matches_gdf = get_tile_matches_gdf(tindex_master_fn, boreal_tile_index_path = boreal_tile_index_path, BAD_TILE_LIST = BAD_TILE_LIST, cols_list = cols_list)

    # Step 2 get the tiles json rfom the gdf of matched tiles
    tile_matches_geojson = ExtractUtils.build_tiles_json(input_tindex_gdf, input_json_fn, SHOW_MAP=True)

    print(f"Building {out_masic_json_fn}")
    mosaicdata = MosaicJSON.from_features(tile_matches_geojson.get('features'), minzoom=6, maxzoom=18, accessor=get_accessor)

    with MosaicBackend(out_masic_json_fn, mosaic_def=mosaicdata) as mosaic:
        mosaic.write(overwrite=True)

In [None]:
def select_gdf_subdomain(tindex_json_fn, SUB_GEOMETRY, DESC='min50N', CRS_SUB_GEOMETRY=4326):
    
    print(tindex_json_fn)
    tindex_json_SUBSET_fn        = f"{os.path.splitext(tindex_json_fn)[0]}_{DESC}.json"
    tindex_mosaic_json_SUBSET_fn = f"{os.path.splitext(tindex_json_fn)[0]}_mosaic_{DESC}.json"

    tindex = gpd.read_file(tindex_json_fn)
    
    print("Subset by SUB_GEOMETRY...")
    selector = tindex.to_crs(CRS_SUB_GEOMETRY).intersects(SUB_GEOMETRY)
    tindex_sub = tindex.to_crs(CRS_SUB_GEOMETRY)[selector]
    
    print("Build mosaic json...")
    build_mosaic_json(tindex_sub, tindex_json_SUBSET_fn, tindex_mosaic_json_SUBSET_fn)
        
    if False:
        tindex_sub.to_file(tindex_master_json_SUBSET_fn, driver='GeoJSON')
        
    return tindex_sub

In [None]:
# Get boreal domain from wwf 
boreal_gdf = gpd.read_file('/projects/shared-buckets/nathanmthomas/analyze_agb/input_zones/wwf_circumboreal_Dissolve.geojson')

# Define a basic project bbox for a min near 50N

In [None]:
boreal_tiles = gpd.read_file('/projects/shared-buckets/montesano/databank/boreal_tiles_v004_model_ready.gpkg')
bbox_project_min50 = boreal_tiles.to_crs(4326).total_bounds
bbox_project_min50[1] = 51.6
bbox_project_min50

In [None]:
geom_project_min50=box(*bbox_project_min50)
geom_project_min50

In [None]:
boreal_tiles.to_crs(4326).total_bounds

bbox_fn = '/projects/my-public-bucket/databank/project_bbox_min50.gpkg'

gdf_min50 = gpd.GeoDataFrame(index=[0], crs='epsg:4326', geometry=[geom_project_min50])
gdf_min50['extent'] = 'boreal biomass domain'
gdf_min50.plot()
gdf_min50.to_file(bbox_fn, driver='GPKG')

In [None]:
min50N_SUB_DOMAIN_selection_geometry = gdf_min50.iloc[0].geometry # this is the same as 'geom'
min50N_SUB_DOMAIN_selection_geometry

# Select tiles for domain: `BBOX min 50N`

In [None]:
tindex_json_fn_AGB = '/projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP/boreal_agb_2024_v6/AGB_H30_2020/Version2_SD/AGB_tindex_master.json'

In [None]:
tindex_json_fn_HT  = '/projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/HT_tindex_master.json'

In [None]:
%%time
min50_tindex_gdf_list = []
for tindex_json_fn in [tindex_json_fn_AGB , tindex_json_fn_HT]:
    min50_tindex_gdf_list.append(select_gdf_subdomain(tindex_json_fn, min50N_SUB_DOMAIN_selection_geometry, DESC='min50N'))

In [None]:
min50_tindex_gdf_list[0].explore(m=min50_tindex_gdf_list[1].explore(color='red'))

# Select tiles for domain: `BOREAL`

In [None]:
tindex_json_SUBSETBOREAL_fn = os.path.splitext(tindex_json_fn)[0] + '_boreal.json'
tindex_mosaic_json_SUBSETBOREAL_fn = os.path.splitext(tindex_json_fn)[0] + '_mosaic_boreal.json'

### Subset covariate tiles by broad domain (boreal)
* Build the spatial selector with 'intersects'
* do the subset accord to region (where region is identified with the spatial selector)

In [None]:
%time
#tile_group_region_string = 'wwf_boreal'
boreal_gdf['dissolve_field'] = 'for intersect'
boreal_geometry = boreal_gdf.dissolve(by='dissolve_field').iloc[0].geometry
boreal_geometry

In [None]:
boreal_geometry.simplify(1)

In [None]:
%%time
boreal_tindex_gdf_list = []
for tindex_json_fn in [tindex_json_fn_AGB , tindex_json_fn_HT]:
    boreal_tindex_gdf_list.append(select_gdf_subdomain(tindex_json_fn, boreal_geometry.simplify(1), DESC='boreal'))

In [None]:
if False:
    AGB_c2020_noground_tindex_BOREAL.to_file(tindex_master_json_SUBSETBOREAL_fn, driver='GeoJSON')
    build_mosaic_json(AGB_c2020_noground_tindex_BOREAL, tindex_master_json_SUBSETBOREAL_fn, tindex_master_mosaic_json_SUBSETBOREAL_fn)

# For DAAC upload: build list of union both domains 
`Note`: this is where tile_num field got messed up.  
##### Fixed: June 2023  
`re-delivered to ORNL DAAC`

In [None]:
tindex_agb_min50N = gpd.read_file(f"{os.path.splitext(tindex_json_fn_AGB)[0]}_min50N.json", driver='GeoJSON')
tindex_agb_BOREAL = gpd.read_file(f"{os.path.splitext(tindex_json_fn_AGB)[0]}_boreal.json", driver='GeoJSON')

tindex_ht_min50N = gpd.read_file(f"{os.path.splitext(tindex_json_fn_HT)[0]}_min50N.json", driver='GeoJSON')
tindex_ht_BOREAL = gpd.read_file(f"{os.path.splitext(tindex_json_fn_HT)[0]}_boreal.json", driver='GeoJSON')

### Get tiles of AGB

In [None]:
def get_union_of_gdfs(tindex_1, tindex_2, OUT_COLS_LIST = ['tile_num', 'tile_group', 's3_path', 'geometry']):
    import numpy as np
    "The UNION is the symmetric_difference + intersection"
    
    # This show tiles that are in one OR the other
    d = gpd.overlay(tindex_1, tindex_2, how='symmetric_difference').fillna(0)
    ax = d.plot(alpha=0.5)
    print(len(d))

    e = gpd.overlay(tindex_1, tindex_2, how='intersection').fillna(0)
    ax = e.plot( color='red', alpha=0.5)
    print(len(e))

    gdf_union = pd.concat([e,d]) #gpd.overlay(tindex_1, tindex_2, how='union')

    ax = e.plot( color='red', alpha=0.5)
    ax = d.plot(ax = ax, alpha=0.5)
    
    print(len(gdf_union))
    print(f"{len(d)} + {len(e)} = {len(d)+len(e)}")
    
    ## Clean up
    gdf_union.replace(0, np.nan, inplace=True)
    gdf_union['tile_num'] = gdf_union[["tile_num_1", "tile_num_2"]].max(axis=1).astype(int)
    gdf_union["tile_group"] = gdf_union["tile_group_1"].fillna(gdf_union["tile_group_2"])
    gdf_union["s3_path"] = gdf_union["s3_path_1"].fillna(gdf_union["s3_path_2"]) 
    
    return gdf_union[OUT_COLS_LIST]

In [None]:
tiles_agb_daac = get_union_of_gdfs(tindex_agb_min50N, tindex_agb_BOREAL)

In [None]:
tiles_ht_daac = get_union_of_gdfs(tindex_ht_min50N, tindex_ht_BOREAL)

### Check for missing tiles

In [None]:
tiles_agb_daac.shape

In [None]:
tiles_ht_daac.shape

In [None]:
#larger - smaller
list(set(tiles_ht_daac.tile_num.to_list()) - set(tiles_agb_daac.tile_num.to_list()))

In [None]:
#tindex = pd.read_csv('/projects/my-public-bucket/DPS_tile_lists/run_build_stack_topo/build_stack_v2024_2/CopernicusGLO30/Topo_tindex_master.csv')
tindex = pd.read_csv(f'/projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP/boreal_agb_2024_v6/AGB_H30_2020/Version2_SD/AGB_tindex_master.csv')
tindex = pd.read_csv(f'/projects/my-public-bucket/DPS_tile_lists/BOREAL_MAP/boreal_agb_2024_v6/Ht_H30_2020/Version2_SD/HT_tindex_master.csv')

In [None]:
tindex['creation_time'] = pd.to_datetime(tindex.file.str.split('_', expand=True)[3], format='%Y%m%d%H%M%S%f')

In [None]:
tindex[tindex.tile_num.isin([22937])]

In [None]:
tindex.sort_values('creation_time').tail(92).head(20)

In [None]:
bad_subset = tindex[(tindex.creation_time > '2024-08-27') &
                    (tindex.creation_time < '2024-10-07')
                   ]
bad_subset.shape

In [None]:
bad_subset

In [None]:
print(bad_subset.tile_num.to_list())

In [None]:
fn = tindex[tindex.tile_num == 23827].s3_path.to_list()[0]
#fn = tindex[tindex.tile_num == 24108].s3_path.to_list()[0]
#fn = tindex[tindex.tile_num == 24389].s3_path.to_list()[0]
fn

# Export a boreal tiles DAAC subset 

In [None]:
tiles_agb_daac.plot(column='tile_group', cmap='viridis')
tiles_agb_daac[['tile_num', 'geometry']].to_file('/projects/my-public-bucket/databank/boreal_tiles_v004_AGB_H30_2020_ORNLDAAC.gpkg', driver='GPKG')

In [None]:
tiles_ht_daac[['tile_num', 'geometry']].to_file('/projects/my-public-bucket/databank/boreal_tiles_v004_HT_H30_2020_ORNLDAAC.gpkg', driver='GPKG')

In [None]:
# boreal_agb_files[0:5]

In [None]:
# with open("/projects/my-public-bucket/boreal_agb_tiles_DAAC.txt", 'w') as file:
#         for row in boreal_agb_files:
#             s = "".join(map(str, row))
#             file.write(s+'\n')