# Build a reduced resolution version of the Boreal Biomass COG mosaic
this reduced res version can be used in ggplot to produce maps for pubs and presentations  
reference:   
https://gist.github.com/wildintellect/  
https://gist.github.com/wildintellect/2efd429da476df46def6af2aa0d1c039  
https://github.com/MAAP-Project/maap-documentation-examples/blob/feat/oviews/visualization/OverviewResampling.ipynb

In [1]:
!conda install mamba
!mamba create -y -n gdal34 -c conda-forge 'gdal>=3.4.3' geopandas ipykernel
!conda activate gdal34
#After this runs switch the kernel of the notebook, may require stopping and closing the notebook

Collecting package metadata (current_repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): done
Solving environment: failed with initial frozen solve. Retrying with flexible solve.

PackagesNotFoundError: The following packages are not available from current channels:

  - mamba

Current channels:

  - https://repo.anaconda.com/pkgs/main/linux-64
  - https://repo.anaconda.com/pkgs/main/noarch
  - https://repo.anaconda.com/pkgs/r/linux-64
  - https://repo.anaconda.com/pkgs/r/noarch

To search for alternate channels that may provide the conda package you're
looking for, navigate to

    https://anaconda.org

and use the search bar at the top of the page.


/bin/bash: mamba: command not found

CommandNotFoundError: Your shell has not been properly configured to use 'conda activate'.
To initialize your shell, run

    $ conda init <SHELL_NAME>

Currently supported shells are:
  - bash
  - fish


In [16]:
import geopandas as gpd
import os
import subprocess

from multiprocessing import Pool
from functools import partial

os.environ['GDAL_DISABLE_READDIR_ON_OPEN'] = 'EMPTY_DIR'

## Get the MosaicJson which has the s3 path to the COGs

In [17]:
# load up the tile_output_index
# dump the s3 column to a txt file (try a small sample)
# generate a VRT from txt file
tile_index_json_fn = "/projects/shared-buckets/nathanmthomas/DPS_tile_lists/07/AGB_tindex_master.json"
tile_index_json_fn = '/projects/shared-buckets/nathanmthomas/DPS_tile_lists/AGB/fall2022/map_boreal_2022_v3/11/AGB_tindex_master.json'
tiles = gpd.read_file(tile_index_json_fn)
tiles.head()

Unnamed: 0,tile_num,tile_version,tile_group,map_version,s3_path,local_path,geometry
0,3407,version 1,eurasia east,,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,"POLYGON ((116.49754 50.47596, 117.25220 51.127..."
1,1590,version 1,eurasia central,,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,"POLYGON ((86.41692 61.19673, 86.72497 61.97926..."
2,2227,version 1,north america centra,,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,"POLYGON ((-93.13096 46.08335, -92.82043 45.295..."
3,1669,version 1,north america east,,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,"POLYGON ((-84.51029 49.85542, -84.32755 49.053..."
4,3633,version 1,eurasia east,,s3://maap-ops-workspace/lduncanson/dps_output/...,/projects/my-private-bucket/dps_output/run_bor...,"POLYGON ((176.12423 66.36034, 178.15684 66.394..."


## Build the list of COGs needed for the VRT

In [18]:
outdir = os.path.dirname(tile_index_json_fn)
vrt_list_fn = os.path.join(outdir, "AGB_tindex.txt")
vrt_fn = os.path.join(outdir, "AGB_tindex.vrt")

In [11]:
with open(vrt_list_fn, 'w') as f:
    for text in tiles.s3_path.tolist():
        f.write(text.replace("s3:/","/vsis3") + '\n')     

## Build the VRT mosaic needed

In [12]:
#get the bbox as part of the gdalbuildvrt and define it, to save time calculating
boreal_tiles = gpd.read_file('/projects/shared-buckets/nathanmthomas/boreal_tiles_v003.gpkg')
boreal_tiles.crs
tiles.to_crs(boreal_tiles.crs).total_bounds

# time gdalbuildvrt -te -5391478 1323304 5408522 9243304 -input_file_list shared-buckets/alexdevseed/AGB_tindex.txt AGB_tindex.vrt
!time gdalbuildvrt -allow_projection_difference -te -5391478 1323304 5408522 10323304 -input_file_list $vrt_list_fn $vrt_fn

0...10...20...30...40...50...60...70...80...90...100 - done.

real	8m22.283s
user	0m15.520s
sys	0m1.063s


### Build the function to handle the res reduction

In [35]:
def generate_reduced_res(in_vrt_fn, method, tr=900, outdir=None):
    ''' 
    Build a COG that is a reduced resolution version of the original VRT with overview built with resampling method of choice
    in_vrt_fn = input VRT filename
    method = the resampling method
    tr = target resolution in meters
    return the path to the new reduced resolution COG
    '''
    #make sure the output folder exists
    #os.makedirs(out_dir, exist_ok=True)
    #writing directly to 'bucket' can be slow since it's using s3fuse /tmp is preferred.
    #out_cog_fn = f'/projects/tmp/AGB_tindex_{method}_{str(tr)}m.tif'
    #out_cog_fn = f'{os.path.splitext(in_vrt_fn)[0]}_{method}_{str(tr)}m.tif'
    
    extension = f'_{method}_{str(tr)}m.tif'
    out_basename = os.path.splitext(os.path.basename(in_vrt_fn))[0] + extension
    
    if outdir is None:
        out_cog_fn = f'{os.path.dirname(in_vrt_fn)}/{out_basename}'
    else:
        out_cog_fn = f'{outdir}/{out_basename}'
    
    process = subprocess.run(['gdal_translate', '-of', 'GTiff',
                              '-tr', f'{str(tr)}', f'{str(tr)}',
                              '-r', f'{method}',
                              #'-co', f'OVERVIEW_RESAMPLING={method}',
                              '-co', 'COMPRESS=Deflate',
                              f'{in_vrt_fn}', f'{out_cog_fn}'
                             ], 
                         stdout=subprocess.PIPE, 
                         universal_newlines=True)
    #print(process)
    #print(out_cog_fn)

    return out_cog_fn

In [14]:
# time gdal_translate -of COG -tr 900 900 -co BIGTIFF=IF_NEEDED -co COMPRESS=DEFLATE -co PREDICTOR=2 GDAL_DISABLE_READDIR_ON_OPEN=EMPTY_DIR AGB_tindex.vrt icesat2_boreal_biomass_900m.tif
#!time gdal_translate -of COG -tr 3600 3600 -co BIGTIFF=IF_NEEDED -co COMPRESS=DEFLATE -co PREDICTOR=2 -r {method} -co OVERVIEW_RESAMPLING={method} /projects/my-public-bucket/DPS_tile_lists/07/AGB_tindex.vrt /projects/tmp/AGB_tindex_avg_3600m.tif

# This subetted window works takes 30 minutes
#time gdal_translate -of COG -tr 900 900 -srcwin 0 0 60000 60000 -co BIGTIFF=IF_NEEDED -co COMPRESS=DEFLATE -co PREDICTOR=2 -r average -co OVERVIEW_RESAMPLING=average /projects/my-public-bucket/DPS_tile_lists/07/AGB_tindex.vrt /projects/tmp/AGB_tindex_avg_900m_sub.tif

In [None]:
#generate_reduced_res(vrt_fn, 'average', tr=3600)

## Current working solutions (Feb 2023)

Loop over all the input tiles and reduce each one - in parrallel. This ensures that only 1 file needs to be read at a time and should be more memory efficient. The VRT of the reduced tiles would then be made and converted into a single GTiff or COG with no additional resampling required.

In [23]:
vsi_list = [text.replace("s3:/","/vsis3") for text in tiles.s3_path.tolist()]
vsi_list[0:13]

['/vsis3/maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_quick_v2_ubuntu/map_boreal_2022_v3/2022/11/21/19/49/35/452366/boreal_agb_202211211669060101_3407.tif',
 '/vsis3/maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_quick_v2_ubuntu/map_boreal_2022_v3/2022/11/21/19/09/56/603958/boreal_agb_202211211669057723_1590.tif',
 '/vsis3/maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_quick_v2_ubuntu/map_boreal_2022_v3/2022/11/21/18/46/19/085056/boreal_agb_202211211669056308_2227.tif',
 '/vsis3/maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_quick_v2_ubuntu/map_boreal_2022_v3/2022/11/21/18/33/22/074064/boreal_agb_202211211669055535_1669.tif',
 '/vsis3/maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_quick_v2_ubuntu/map_boreal_2022_v3/2022/11/21/18/24/14/398161/boreal_agb_202211211669054977_3633.tif',
 '/vsis3/maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_quick_v2_ubuntu/map_boreal_2022_v3/2022/11/21/18/19/40/772820/boreal_ag

In [21]:
#testing 1
# there was a bug in the gdal43 env where AWS permissions failed, switched back to base
#generate_reduced_res(vsi_list[2], 'average', tr=3000)

In [None]:
# with Pool(processes=10) as pool:
#     pool.map(partial(generate_reduced_res, method='average', tr=6000), vsi_list[13:])

#### Choose an output dir for the resampled tiles
sometimes you cant write these next to their originals b/c they are in someone else's private bucket

In [None]:
output_location = os.path.dirname(tile_index_json_fn) + '/resample/'
!mkdir -p $output_location

## Do individual tile resampling 
##### (Find the missing tiles and fix)

1. Load up the list of files that were produced.
1. Compare against list of inputs, make a list of missing
1. Run the missing tiles through resampler.

In [36]:
resampled = os.listdir(output_location)

resampled_tif = [file for file in resampled if file.endswith('.tif')]
len(resampled_tif)

0

In [31]:
resampled_tilenum = [int(os.path.basename(file).split("_")[3]) for file in resampled_tif]
resampled_tilenum[10:20]

[]

In [34]:
missing = tiles.loc[~tiles['tile_num'].isin(resampled_tilenum)].s3_path.tolist()
missing = [text.replace("s3:/","/vsis3") for text in missing]
print(len(missing))
missing[0]

4714


'/vsis3/maap-ops-workspace/lduncanson/dps_output/run_boreal_biomass_quick_v2_ubuntu/map_boreal_2022_v3/2022/11/21/19/49/35/452366/boreal_agb_202211211669060101_3407.tif'

In [39]:
with Pool(processes=10) as pool:
    pool.map(partial(generate_reduced_res, method='average', tr=3000, outdir=output_location), missing)
#[generate_reduced_res(fix, 'average', tr=3000, outdir=output_location) for fix in missing]

### Build a VRT of the coarsened individual tiles

In [41]:
!time gdalbuildvrt -allow_projection_difference -te -5391478 1323304 5408522 10323304 $vrt_fn $output_location/*.tif

0...10...20...30...40...50...60...70...80...90...100 - done.

real	1m17.469s
user	0m10.405s
sys	0m0.727s


### Make a COG of the coarsened VRT for plotting, mapping, etc

In [42]:
%%time
generate_reduced_res(vrt_fn, 'average', tr=3000)

CPU times: user 2.19 ms, sys: 10 ms, total: 12.2 ms
Wall time: 1min 47s


'/projects/shared-buckets/nathanmthomas/DPS_tile_lists/AGB/fall2022/map_boreal_2022_v3/11/AGB_tindex_average_3000m.tif'