In [None]:
''' 
Having issues with multiprocessing doing multiple concurrent requests?

<p>CMR Search rate exceeded. Please refer to the following for guidance: https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#request-moderation

many STAC queries - per month/year, per product, incrementing the max_cloud

add time.sleep() between requests ?

Reduce request count?
- Instead of increasing cloud threshold single threshold?

'''

In [7]:
import os

from pathlib import Path
from datetime import datetime
from calendar import monthrange

import fiona

import multiprocessing as mp

import geopandas as gpd

import copy

In [8]:
import sys
sys.path.append('/projects/my-private-bucket/code-git-shared/icesat2_boreal/lib')
import plotlib
from mosaiclib import *

In [9]:
import rasterio
from rasterio.plot import show_hist, show
import numpy as np
import matplotlib.pyplot as plt

In [10]:
import multiprocessing as mp

num_cores = mp.cpu_count()
print(f"Number of CPU cores: {num_cores}")

import psutil

mem = psutil.virtual_memory()
print(f"Total memory: {mem.total / 1e9:.2f} GB")
print(f"Available memory: {mem.available / 1e9:.2f} GB")

Number of CPU cores: 32
Total memory: 267.33 GB
Available memory: 256.46 GB


In [11]:
# Functions 
def wrapper_composite(params):

    FOCAL_TILE = params.get('FOCAL_TILE')
    SAT_API = params.get('SAT_API')
    MS_COMP_TYPE = params.get('MS_COMP_TYPE')
    YEAR = params.get('YEAR')
    MIN_N_FILT_RESULTS = params.get('MIN_N_FILT_RESULTS')
    SEASON_START = params.get('SEASON_START')
    SEASON_STOP = params.get('SEASON_STOP')
    # INDEX_FN = params.get('INDEX_FN')
    # INDEX_LYR = params.get('INDEX_LYR')
    
    STAT = params.get('STAT')
    STAT_PCT = params.get('STAT_PCT')
    TARGET_SPECTRAL = params.get('TARGET_SPECTRAL')

    INDEX_FN =  params.get('INDEX_FN') #'https://maap-ops-workspace.s3.amazonaws.com/shared/montesano/databank/boreal_tiles_v004.gpkg'
    INDEX_LYR = params.get('INDEX_LYR') # 'boreal_tiles_v004'
    
    YEAR_START, YEAR_STOP = (YEAR, YEAR)
    HLS_PRODUCT = params.get('HLS_PRODUCT') #HLS_PRODUCT = 'H30'
    MAX_CLOUDS = params.get('MAX_CLOUDS') #MAX_CLOUDS = 0

    OUTDIR = params.get('OUTDIR') #'/projects/my-private-bucket/tmp/mask_test_keep_snow'
    
    args = f"--in_tile_fn {INDEX_FN} \
        --in_tile_layer {INDEX_LYR} \
        --sat_api {SAT_API} \
        --tile_buffer_m 0 \
        --in_tile_num {FOCAL_TILE} \
        --output_dir {OUTDIR} \
        -sy {YEAR_START} -ey {YEAR_STOP} -smd {SEASON_START} -emd {SEASON_STOP} -mc {MAX_CLOUDS} \
        --composite_type {MS_COMP_TYPE} \
        --hls_product {HLS_PRODUCT} \
        --thresh_min_ndvi -1 \
        --min_n_filt_results {MIN_N_FILT_RESULTS} \
        --stat {STAT} \
        --stat_pct {STAT_PCT} \
        --target_spectral_index {TARGET_SPECTRAL}"
    args += " --do_indices"
    #args += " --search_only"
    # args += " --rangelims_red 0.01 0.1" # the default now effectively no limit [-1e9, 1e9]
    args += " --rangelims_red 0.01 1" # the default now effectively no limit [-1e9, 1e9]
    
    # cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite.py {args}'
    # cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite_multip.py {args}'
    cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite_addEVI2.py {args}'
    #!echo $cmd
    !eval $cmd

    fn = f'{OUTDIR}/{MS_COMP_TYPE}_{FOCAL_TILE}_{SEASON_START}_{SEASON_STOP}_{YEAR_START}_{YEAR_STOP}_{STAT}{TARGET_SPECTRAL}.tif'
    if STAT == 'percentile':
        fn = f'{OUTDIR}/{MS_COMP_TYPE}_{FOCAL_TILE}_{SEASON_START}_{SEASON_STOP}_{YEAR_START}_{YEAR_STOP}_{STAT}{STAT_PCT}{TARGET_SPECTRAL}.tif'
    #rescaled_multiband_fn = os.path.join(os.path.dirname(fn), os.path.basename(fn).replace('.tif','_rescaled_3band_temp.tif'))
    # plotlib.rescale_multiband_for_plot(fn, rescaled_multiband_fn, bandlist = [5,7,3], pct=[20,90], nodata=-9999.0) 

    return fn

In [16]:
# Define input and output paths
INDEX_FN = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg' 
BASE_OUTDIR = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/HLS_composites/monthly/br_af_grid30km_prj_evi2_p95'

In [17]:
# Set default parameters
SAT_API = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD'
MS_COMP_TYPE = 'HLS'
HLS_PRODUCT = 'H30'

# STAT = 'max'
STAT = 'percentile'

# TARGET_SPECTRAL = 'ndvi'
TARGET_SPECTRAL = 'evi2'
STAT_PCT = 95.0

MIN_N_FILT_RESULTS = 10
MAX_CLOUDS = 0 # Threshold of max clouds to start search, e.g. when set to 0 it starts at 0% of cloud cover and go up to 90% 


In [18]:
# Get first Layer name (First layer is used as default)
layer_names = fiona.listlayers(INDEX_FN)
# Get the first layer name
first_layer_name = layer_names[0]
INDEX_LYR = first_layer_name

In [19]:
# Define parameters
params = {
    'SAT_API': SAT_API,
    'HLS_PRODUCT': HLS_PRODUCT,
    'MS_COMP_TYPE': MS_COMP_TYPE,
    'MAX_CLOUDS': MAX_CLOUDS,
    'MIN_N_FILT_RESULTS': MIN_N_FILT_RESULTS,
    'STAT': STAT,
    'STAT_PCT': STAT_PCT,
    'TARGET_SPECTRAL': TARGET_SPECTRAL,
    'INDEX_FN': INDEX_FN,
    'INDEX_LYR': INDEX_LYR,
}

In [20]:
# Define year
years = [2022]

# Get all tiles IDs
with fiona.open(INDEX_FN, layer=0) as src:
    tiles = [feature["properties"]["tile_num"] for feature in src]
    
tiles_gpkg = gpd.read_file(INDEX_FN)
tiles = tiles_gpkg["tile_num"].tolist()
# # tiles

In [21]:
len(tiles)

1561

In [22]:
# Select tiles to run
# tiles_run = tiles

tiles_run = tiles[3:4]
tiles_run


[4]

In [24]:
# years = [2022]
years = [2018,2019,2020,2021,2022]
# years = [2015,2016,2017]

# type(years)

In [25]:
%%time
# Create parameter list and create output directories
# This is set up to do monthly composites
params_list = []
for tile in tiles_run: 
    for year in years:
        # for month in range(2, 4):
        for month in range(1, 13):
            start_day = f"{month:02d}-01"
            end_day = f"{month:02d}-{monthrange(year, month)[1]:02d}"
    
            # output directory for specific run
            # outdir = f'{BASE_OUTDIR}/tile_{tile:03d}/{year}/{month:02d}'
            outdir = f'{BASE_OUTDIR}/tile_{tile:03d}/{year}/'
            
            os.makedirs(outdir, exist_ok=True)
    
            run_params = copy.deepcopy(params)
            run_params.update({
                'FOCAL_TILE': tile,
                'YEAR': year,
                'SEASON_START': start_day,
                'SEASON_STOP': end_day,
                'OUTDIR': str(outdir)
            })
    
            params_list.append(run_params)



CPU times: user 0 ns, sys: 3.93 ms, total: 3.93 ms
Wall time: 571 ms


In [26]:
len(params_list)

60

In [27]:
%%time
# mp.cpu_count() - 1
# with mp.Pool(processes= 5) as pool:
#     fn_list = pool.map(wrapper_composite, params_list)
with mp.Pool(processes= 12) as pool:
    fn_list = pool.map(wrapper_composite, params_list)


Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30
in_bbox:		 [4839419.049164498, 7374073.29125729, 4869419.049164498, 7404073.29125729]
bbox 4326:		 [-55.574889898123715, -23.729593329414808, -55.278057362641555, -23.456105892179057]
Getting output dims from buffered (buffer=0.0) original tile geome

In [None]:
# # Plotting to check false composite to check
# fn = fn_list[0]
# fn
# rescaled_multiband_fn = fn.replace('.tif', '_rescaled_3band_temp.tif')
# plotlib.rescale_multiband_for_plot(fn, rescaled_multiband_fn, bandlist = [6,4,3], pct=[20,98], nodata=-9999.0) 

# fig,ax=plt.subplots(figsize=(10,10))
# with rasterio.open(rescaled_multiband_fn) as src:
#     #print(src.profile)
#     show(src.read(),transform=src.transform, ax=ax, title=os.path.basename(rescaled_multiband_fn))

In [None]:
### Notes

# Time to run 1 tile 1 year 12 processors



In [None]:
## CHECK ERRORS

## No output is created sometimes when runnning multiple jobs