In [1]:
import os

from pathlib import Path
from datetime import datetime
from calendar import monthrange

import fiona

import multiprocessing as mp

import geopandas as gpd

import copy

In [2]:
import sys
sys.path.append('/projects/my-private-bucket/code-git-shared/icesat2_boreal/lib')
import plotlib
from mosaiclib import *

In [3]:
import rasterio
from rasterio.plot import show_hist, show
import numpy as np
import matplotlib.pyplot as plt

In [4]:
import multiprocessing as mp

num_cores = mp.cpu_count()
print(f"Number of CPU cores: {num_cores}")

import psutil

mem = psutil.virtual_memory()
print(f"Total memory: {mem.total / 1e9:.2f} GB")
print(f"Available memory: {mem.available / 1e9:.2f} GB")

Number of CPU cores: 32
Total memory: 267.33 GB
Available memory: 193.47 GB


In [9]:
# Functions 
def wrapper_composite(params):

    FOCAL_TILE = params.get('FOCAL_TILE')
    SAT_API = params.get('SAT_API')
    MS_COMP_TYPE = params.get('MS_COMP_TYPE')
    YEAR = params.get('YEAR')
    MIN_N_FILT_RESULTS = params.get('MIN_N_FILT_RESULTS')
    SEASON_START = params.get('SEASON_START')
    SEASON_STOP = params.get('SEASON_STOP')
    # INDEX_FN = params.get('INDEX_FN')
    # INDEX_LYR = params.get('INDEX_LYR')
    
    STAT = params.get('STAT')
    STAT_PCT = params.get('STAT_PCT')
    TARGET_SPECTRAL = params.get('TARGET_SPECTRAL')

    INDEX_FN =  params.get('INDEX_FN') #'https://maap-ops-workspace.s3.amazonaws.com/shared/montesano/databank/boreal_tiles_v004.gpkg'
    INDEX_LYR = params.get('INDEX_LYR') # 'boreal_tiles_v004'
    
    YEAR_START, YEAR_STOP = (YEAR, YEAR)
    HLS_PRODUCT = params.get('HLS_PRODUCT') #HLS_PRODUCT = 'H30'
    MAX_CLOUDS = params.get('MAX_CLOUDS') #MAX_CLOUDS = 0

    OUTDIR = params.get('OUTDIR') #'/projects/my-private-bucket/tmp/mask_test_keep_snow'
    
    args = f"--in_tile_fn {INDEX_FN} \
        --in_tile_layer {INDEX_LYR} \
        --sat_api {SAT_API} \
        --tile_buffer_m 0 \
        --in_tile_num {FOCAL_TILE} \
        --output_dir {OUTDIR} \
        -sy {YEAR_START} -ey {YEAR_STOP} -smd {SEASON_START} -emd {SEASON_STOP} -mc {MAX_CLOUDS} \
        --composite_type {MS_COMP_TYPE} \
        --hls_product {HLS_PRODUCT} \
        --thresh_min_ndvi -1 \
        --min_n_filt_results {MIN_N_FILT_RESULTS} \
        --stat {STAT} \
        --stat_pct {STAT_PCT} \
        --target_spectral_index {TARGET_SPECTRAL}"
    args += " --do_indices"
    #args += " --search_only"
    # args += " --rangelims_red 0.01 0.1" # the default now effectively no limit [-1e9, 1e9]
    args += " --rangelims_red 0.01 1" # the default now effectively no limit [-1e9, 1e9]
    
    # cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite.py {args}'
    # cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite_multip.py {args}'
    cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite_addEVI2.py {args}'
    #!echo $cmd
    !eval $cmd

    fn = f'{OUTDIR}/{MS_COMP_TYPE}_{FOCAL_TILE}_{SEASON_START}_{SEASON_STOP}_{YEAR_START}_{YEAR_STOP}_{STAT}{TARGET_SPECTRAL}.tif'
    if STAT == 'percentile':
        fn = f'{OUTDIR}/{MS_COMP_TYPE}_{FOCAL_TILE}_{SEASON_START}_{SEASON_STOP}_{YEAR_START}_{YEAR_STOP}_{STAT}{STAT_PCT}{TARGET_SPECTRAL}.tif'
    #rescaled_multiband_fn = os.path.join(os.path.dirname(fn), os.path.basename(fn).replace('.tif','_rescaled_3band_temp.tif'))
    # plotlib.rescale_multiband_for_plot(fn, rescaled_multiband_fn, bandlist = [5,7,3], pct=[20,90], nodata=-9999.0) 

    return fn

In [10]:
# Define input and output paths
INDEX_FN = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid90km_prj.gpkg' 
BASE_OUTDIR = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/HLS_composites/yearly/br_af_grid90km_evi2_p95'
# BASE_OUTDIR = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/HLS_composites/yearly/br_af_grid90km_ndvi_p95'


In [12]:
# Set default parameters
SAT_API = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD'
MS_COMP_TYPE = 'HLS'
HLS_PRODUCT = 'H30'

# STAT = 'max'
STAT = 'percentile'

# TARGET_SPECTRAL = 'ndvi'
TARGET_SPECTRAL = 'evi2'
STAT_PCT = 95.0

MIN_N_FILT_RESULTS = 10
MAX_CLOUDS = 0 # Threshold of max clouds to start search, e.g. when set to 0 it starts at 0% of cloud cover and go up to 90% 


In [13]:
# Get first Layer name (First layer is used as default)
layer_names = fiona.listlayers(INDEX_FN)
# Get the first layer name
first_layer_name = layer_names[0]
INDEX_LYR = first_layer_name

In [14]:
# Define parameters
params = {
    'SAT_API': SAT_API,
    'HLS_PRODUCT': HLS_PRODUCT,
    'MS_COMP_TYPE': MS_COMP_TYPE,
    'MAX_CLOUDS': MAX_CLOUDS,
    'MIN_N_FILT_RESULTS': MIN_N_FILT_RESULTS,
    'STAT': STAT,
    'STAT_PCT': STAT_PCT,
    'TARGET_SPECTRAL': TARGET_SPECTRAL,
    'INDEX_FN': INDEX_FN,
    'INDEX_LYR': INDEX_LYR,
}

In [15]:
# Get all tiles IDs
with fiona.open(INDEX_FN, layer=0) as src:
    tiles = [feature["properties"]["tile_num"] for feature in src]
    
tiles_gpkg = gpd.read_file(INDEX_FN)
tiles = tiles_gpkg["tile_num"].tolist()
# # tiles

In [16]:
len(tiles)

220

In [17]:
# Select tiles to run
# tiles_run = tiles

# tiles_run = tiles[3:4]
# tiles_run
tiles_run = [89,99]
# tiles_run = [99]


In [18]:
# years = [2018,2019,2020,2021,2022]
years = [2020,2022,2024]

# type(years)

In [19]:
%%time
# Create parameter list and create output directories
# This is set up to do monthly composites
params_list = []
start_day = "01-01"
end_day = "12-31"

for tile in tiles_run: 
    for year in years:

        # output directory for specific run
        outdir = f'{BASE_OUTDIR}/tile_{tile:03d}/'
        
        os.makedirs(outdir, exist_ok=True)

        run_params = copy.deepcopy(params)
        # run_params = params.copy()
        run_params.update({
            'FOCAL_TILE': tile,
            'YEAR': year,
            'SEASON_START': start_day,
            'SEASON_STOP': end_day,
            'OUTDIR': str(outdir)
        })

        params_list.append(run_params)



CPU times: user 2.09 ms, sys: 69 µs, total: 2.16 ms
Wall time: 118 ms


In [20]:
# len(params_list)
# params_list

In [21]:
params_list[0]

{'SAT_API': 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD',
 'HLS_PRODUCT': 'H30',
 'MS_COMP_TYPE': 'HLS',
 'MAX_CLOUDS': 0,
 'MIN_N_FILT_RESULTS': 10,
 'STAT': 'percentile',
 'STAT_PCT': 95.0,
 'TARGET_SPECTRAL': 'evi2',
 'INDEX_FN': '/projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid90km_prj.gpkg',
 'INDEX_LYR': 'br_af_grid90km_prj',
 'FOCAL_TILE': 89,
 'YEAR': 2020,
 'SEASON_START': '01-01',
 'SEASON_STOP': '12-31',
 'OUTDIR': '/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/HLS_composites/yearly/br_af_grid90km_evi2_p95/tile_089/'}

In [22]:
# params_list[0]#['SEASON_START'].split('-')[0]
# params_list[10:13]

In [23]:
%%time
for params in params_list:
    try:
        print(f"Running tile {params['FOCAL_TILE']}, year {params['YEAR']}")
        output_file = wrapper_composite(params)
        print(f"Output saved to: {output_file}")
    except Exception as e:
        print(f"Error processing tile {params['FOCAL_TILE']}, year {params['YEAR']}: {e}")

Running tile 89, year 2020

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid90km_prj.gpkg
Tile number:		 89
Output res (m):		 30
in_bbox:		 [5469419.049164498, 7224073.29125729, 5559419.049164498, 7314073.29125729]
bbox 4326:		 [-49.378378833496065, -25.01637488893418, -48.45754764772649, -24.17719601338586]
Getting output dims from buffered (buffer=0.0) original tile geometry...
Output dims:		3000 x 3000
Composite type:		HLS

Getting HLS Surface Reflectance H30 data...
bbox: [-49.378378833496065, -25.01637488893418, -48.45754764772649, -24.17719601338586]

Querying STAC for multispectral imagery...
Catalog title: LPCLOUD STAC Catalog
start date, end date:		 2020-01-01T00:00:00Z 2020-12-31T23:59:59Z

Conducting multispectral image search now ...
Searching for:			['HLSL30_2.0', 'HLSS30_2.0']
Max cloudcover threshold starts at: 0% and won't exceed 90%
Min number of filtered results: 10
partial results (HLSL30_2.0):				15
partial results (H

In [None]:
## NOTES

# 1 tile 2 years
# Wall time: 1h 5min 14s
# 1 tile 1 year br_af_grid30km_prj.gpkg
# Wall time: ~36min

In [None]:
output_file

## MULTIPROCESSING - Error with multiple requests

In [None]:
%%time
# mp.cpu_count() - 1
# with mp.Pool(processes= 5) as pool:
#     fn_list = pool.map(wrapper_composite, params_list)
with mp.Pool(processes= 12) as pool:
    fn_list = pool.map(wrapper_composite, params_list)

In [None]:
# # Plotting to check false composite to check
# fn = fn_list[0]
# fn
# rescaled_multiband_fn = fn.replace('.tif', '_rescaled_3band_temp.tif')
# plotlib.rescale_multiband_for_plot(fn, rescaled_multiband_fn, bandlist = [6,4,3], pct=[20,98], nodata=-9999.0) 

# fig,ax=plt.subplots(figsize=(10,10))
# with rasterio.open(rescaled_multiband_fn) as src:
#     #print(src.profile)
#     show(src.read(),transform=src.transform, ax=ax, title=os.path.basename(rescaled_multiband_fn))

In [None]:
### Notes

# Time to run 1 tile 1 year 12 processors



In [None]:
## CHECK ERRORS

## No output is created sometimes when runnning multiple jobs