In [28]:
import os

from pathlib import Path
from datetime import datetime
from calendar import monthrange

import fiona

import multiprocessing as mp

import geopandas as gpd


In [29]:
import sys
sys.path.append('/projects/my-private-bucket/code-git-shared/icesat2_boreal/lib')
import plotlib
from mosaiclib import *

In [30]:
import rasterio
from rasterio.plot import show_hist, show
import numpy as np
import matplotlib.pyplot as plt

In [31]:
# Functions 
def wrapper_composite(params):

    FOCAL_TILE = params.get('FOCAL_TILE')
    SAT_API = params.get('SAT_API')
    MS_COMP_TYPE = params.get('MS_COMP_TYPE')
    YEAR = params.get('YEAR')
    MIN_N_FILT_RESULTS = params.get('MIN_N_FILT_RESULTS')
    SEASON_START = params.get('SEASON_START')
    SEASON_STOP = params.get('SEASON_STOP')
    # INDEX_FN = params.get('INDEX_FN')
    # INDEX_LYR = params.get('INDEX_LYR')
    
    STAT = params.get('STAT')
    STAT_PCT = params.get('STAT_PCT')
    TARGET_SPECTRAL = params.get('TARGET_SPECTRAL')

    INDEX_FN =  params.get('INDEX_FN') #'https://maap-ops-workspace.s3.amazonaws.com/shared/montesano/databank/boreal_tiles_v004.gpkg'
    INDEX_LYR = params.get('INDEX_LYR') # 'boreal_tiles_v004'
    
    YEAR_START, YEAR_STOP = (YEAR, YEAR)
    HLS_PRODUCT = params.get('HLS_PRODUCT') #HLS_PRODUCT = 'H30'
    MAX_CLOUDS = params.get('MAX_CLOUDS') #MAX_CLOUDS = 0

    OUTDIR = params.get('OUTDIR') #'/projects/my-private-bucket/tmp/mask_test_keep_snow'
    
    args = f"--in_tile_fn {INDEX_FN} \
        --in_tile_layer {INDEX_LYR} \
        --sat_api {SAT_API} \
        --tile_buffer_m 0 \
        --in_tile_num {FOCAL_TILE} \
        --output_dir {OUTDIR} \
        -sy {YEAR_START} -ey {YEAR_STOP} -smd {SEASON_START} -emd {SEASON_STOP} -mc {MAX_CLOUDS} \
        --composite_type {MS_COMP_TYPE} \
        --hls_product {HLS_PRODUCT} \
        --thresh_min_ndvi -1 \
        --min_n_filt_results {MIN_N_FILT_RESULTS} \
        --stat {STAT} \
        --stat_pct {STAT_PCT} \
        --target_spectral_index {TARGET_SPECTRAL}"
    args += " --do_indices"
    #args += " --search_only"
    # args += " --rangelims_red 0.01 0.1" # the default now effectively no limit [-1e9, 1e9]
    args += " --rangelims_red 0.01 1" # the default now effectively no limit [-1e9, 1e9]
    
    # cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite.py {args}'
    # cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite_multip.py {args}'
    cmd = f'python /projects/my-private-bucket/code-git-shared/icesat2_boreal/lib/build_ms_composite_addEVI2.py {args}'
    #!echo $cmd
    !eval $cmd

    fn = f'{OUTDIR}/{MS_COMP_TYPE}_{FOCAL_TILE}_{SEASON_START}_{SEASON_STOP}_{YEAR_START}_{YEAR_STOP}_{STAT}{TARGET_SPECTRAL}.tif'
    if STAT == 'percentile':
        fn = f'{OUTDIR}/{MS_COMP_TYPE}_{FOCAL_TILE}_{SEASON_START}_{SEASON_STOP}_{YEAR_START}_{YEAR_STOP}_{STAT}{STAT_PCT}{TARGET_SPECTRAL}.tif'
    #rescaled_multiband_fn = os.path.join(os.path.dirname(fn), os.path.basename(fn).replace('.tif','_rescaled_3band_temp.tif'))
    # plotlib.rescale_multiband_for_plot(fn, rescaled_multiband_fn, bandlist = [5,7,3], pct=[20,90], nodata=-9999.0) 

    return fn

In [32]:
# Set default parameters
SAT_API = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD'
MS_COMP_TYPE = 'HLS'
HLS_PRODUCT = 'H30'

# STAT = 'max'
STAT = 'percentile'

# TARGET_SPECTRAL = 'ndvi'
TARGET_SPECTRAL = 'evi2'
STAT_PCT = 95.0

MIN_N_FILT_RESULTS = 10
MAX_CLOUDS = 0 # Threshold of max clouds to start search, e.g. when set to 0 it starts at 0% of cloud cover and go up to 90% 

# INDEX_FN = '/projects/HLS/data/shp/atlantic_forest/tiles/ls_unit50km_utm23s.gpkg' # Needs a tile_num column with the index and be in a projected coordinate system

# INDEX_FN = '/projects/my-private-bucket/HLS/data/shp/atlantic_forest/tiles/br_af_grid90km_prj.gpkg' 
# INDEX_LYR = 'br_af_grid90km_prj'

INDEX_FN = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg' 
INDEX_LYR = 'br_af_grid30km_prj'


# INDEX_FN = '/projects/HLS/data/shp/atlantic_forest/tiles/ls_unit100km_prj.gpkg' # Needs a tile_num column with the index and be in a projected coordinate system
# # Get first layer name
# layer_names = fiona.listlayers(INDEX_FN)
# # Get the first layer name
# first_layer_name = layer_names[0]
# INDEX_LYR = first_layer_name


BASE_OUTDIR = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/tmp/tests_ndvi'

In [None]:
# INDEX_FN = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid90km_prj.gpkg' 
# INDEX_LYR = 'br_af_grid30km_prj'

# tiles_gpkg = gpd.read_file(INDEX_FN)


In [54]:
# import fiona
# with fiona.open(INDEX_FN) as src:
#     print(src.crs)
# import rasterio
# rast_path = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/tmp/tests_evi2_p95_2/tile_004/2022/HLS_4_12-01_12-31_2022_2022_percentile95.0evi2.tif'
# with rasterio.open(rast_path) as src:
#     print(src.crs)


EPSG:5880


### Testing composite run

In [None]:
# Define list of tiles and years 
# test
tiles_gpkg = gpd.read_file(INDEX_FN)
tiles = tiles_gpkg["tile_num"].tolist()
# tiles

tiles_run = tiles[3:4]  
years = [2022] #[2018, 2024]

# All

# with fiona.open(INDEX_FN, layer=0) as src:
#     tiles = [feature["properties"]["tile_num"] for feature in src]
tiles_run

In [None]:
# Create dictionary of params
params = {
    'SAT_API': SAT_API,
    'HLS_PRODUCT': HLS_PRODUCT,
    'MS_COMP_TYPE': MS_COMP_TYPE,
    'MAX_CLOUDS': MAX_CLOUDS,
    'MIN_N_FILT_RESULTS': MIN_N_FILT_RESULTS,
    'STAT': STAT,
    'STAT_PCT': STAT_PCT,
    'TARGET_SPECTRAL': TARGET_SPECTRAL,
    'INDEX_FN': INDEX_FN,
    'INDEX_LYR': INDEX_LYR,
}

In [None]:
params

In [None]:
params.update({
                'FOCAL_TILE': 4,
                'YEAR': 2022,
                'SEASON_START': '01-01',
                'SEASON_STOP': '12-01',
                'OUTDIR': str(BASE_OUTDIR)
            })


fn = wrapper_composite(params)

In [None]:
params

In [None]:
# Check image bands
with rasterio.open(output_file) as src:
    # print(dir(src))
    # print(src.descriptions.index('count')) 
    print(src.descriptions) 
    
#     band_index = src.descriptions.index('count') + 1  # +1 because src.read is 1-based
    
#     # Read the count band as a numpy array
#     count_array = src.read(band_index)
    
# print(count_array.shape)
# print(count_array)   # if you want to see values
# import numpy
# print(numpy.unique(count_array))
# print(numpy.histogram(count_array))
    

In [None]:
## PLotting count
# fig,ax=plt.subplots(figsize=(10,10))
# with rasterio.open(output_file) as src:
#     count_band = src.read(src.descriptions.index('count')+1, masked=True)   # read the count band
#     # print(dir(plt))
#     # plt.legend()
#     # print(count_band)
#     img = show(count_band, transform=src.transform, ax=ax,
#          cmap="viridis") 
#     cbar = plt.colorbar(img.get_images()[0], ax=ax, label="Count values")
 
    

In [None]:
# import os
# os.system('gdalinfo -mm ' + output_file)

In [None]:
## Plotting true color
# with rasterio.open(output_file) as src:
#     print("Band descriptions:", src.descriptions)

#     # Define which bands correspond to R, G, B (adjust names/indexes as needed)
#     r_index = src.descriptions.index("Red") + 1
#     g_index = src.descriptions.index("Green") + 1
#     b_index = src.descriptions.index("Blue") + 1

#     # Read the 3 bands as masked arrays
#     rgb = src.read([r_index, g_index, b_index], masked=True)

#     # Reshape to H x W x 3 for plotting
#     rgb_img = rasterio.plot.reshape_as_image(rgb)

#     # Clip negative values to 0
#     rgb[rgb < 0] = 0

#     # Normalize to [0, 1] by percent stretch (optional: 2–98%)
#     p2, p98 = np.percentile(rgb.compressed(), (2, 98))
#     rgb = np.clip((rgb - p2) / (p98 - p2), 0, 1)

#     # Scale to [0, 255] and convert to uint8
#     rgb = (rgb * 255).astype(np.uint8)

#     # Reshape to H x W x 3 for plotting
#     rgb_img = rasterio.plot.reshape_as_image(rgb)

# fig, ax = plt.subplots(figsize=(10, 10))
# ax.imshow(rgb_img)
# ax.set_title("RGB Composite")
# ax.axis("off")
# plt.show()

In [None]:
## Plotting false composite
output_file = fn
rescaled_multiband_fn = output_file.replace('.tif', '_rescaled_3band_temp.tif')

# plotlib.rescale_multiband_for_plot(output_file, rescaled_multiband_fn, bandlist = [6,4,3], pct=[20,98], nodata=-9999.0) 
plotlib.rescale_multiband_for_plot(output_file, rescaled_multiband_fn, bandlist = [4,3,2], pct=[20,98], nodata=-9999.0) 


fig,ax=plt.subplots(figsize=(10,10))
with rasterio.open(rescaled_multiband_fn) as src:
    #print(src.profile)
    show(src.read(),transform=src.transform, ax=ax, title=os.path.basename(rescaled_multiband_fn))

In [None]:
# INDEX_FN = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg' 

# tiles_gpkg = gpd.read_file(INDEX_FN)

In [None]:
# tiles_gpkg.iloc[3].geometry.bounds

In [None]:
# tiles_gpkg

In [None]:
# for tile in tiles:
#     for year in years:
#         for month in range(2, 4):
#             # print(tile,year,month)
#             outdir = f'{BASE_OUTDIR}/tile_{tile}/{year}/{month:02d}'
#             print(outdir)



In [None]:
# BASE_OUTDIR

In [None]:
tiles_run

In [None]:
years

In [None]:
%%time
for tile in tiles_run:
    for year in years:
        # for month in range(2, 4):
        for month in range(1, 13):
            start_day = f"{month:02d}-01"
            end_day = f"{month:02d}-{monthrange(year, month)[1]:02d}"

            # Compose output directory for this specific run
            outdir = f'{BASE_OUTDIR}/tile_{tile:03d}/{year}/{month:02d}'
            os.makedirs(outdir, exist_ok=True)

            params.update({
                'FOCAL_TILE': tile,
                'YEAR': year,
                'SEASON_START': start_day,
                'SEASON_STOP': end_day,
                'OUTDIR': str(outdir)
            })

            try:
                print(f"Running tile {tile}, year {year}, month {month:02d}")
                output_file = wrapper_composite(params)
                print(f"Output saved to: {output_file}")
            except Exception as e:
                print(f"Error processing tile {tile}, year {year}, month {month:02d}: {e}")

In [None]:
output_file = '/projects/my-private-bucket/HLS/data/tif/monthly_composites_evi/tile_001/2022/03/HLS_1_03-01_03-31_2022_2022_maxevi2.tif'

In [None]:
# rescaled_multiband_fn = output_file.replace('.tif', '_rescaled_3band_temp.tif')

# plotlib.rescale_multiband_for_plot(output_file, rescaled_multiband_fn, bandlist = [6,4,3], pct=[20,98], nodata=-9999.0) 


# fig,ax=plt.subplots(figsize=(10,10))
# with rasterio.open(rescaled_multiband_fn) as src:
#     #print(src.profile)
    # show(src.read(),transform=src.transform, ax=ax, title=os.path.basename(rescaled_multiband_fn))

### Testing multiprocessing parallel

In [35]:
import multiprocessing as mp

num_cores = mp.cpu_count()
print(f"Number of CPU cores: {num_cores}")

import psutil

mem = psutil.virtual_memory()
print(f"Total memory: {mem.total / 1e9:.2f} GB")
print(f"Available memory: {mem.available / 1e9:.2f} GB")

Number of CPU cores: 32
Total memory: 267.33 GB
Available memory: 256.80 GB


In [36]:
params = {
    'SAT_API': SAT_API,
    'HLS_PRODUCT': HLS_PRODUCT,
    'MS_COMP_TYPE': MS_COMP_TYPE,
    'MAX_CLOUDS': MAX_CLOUDS,
    'MIN_N_FILT_RESULTS': MIN_N_FILT_RESULTS,
    'STAT': STAT,
    'STAT_PCT': STAT_PCT,
    'TARGET_SPECTRAL': TARGET_SPECTRAL,
    'INDEX_FN': INDEX_FN,
    'INDEX_LYR': INDEX_LYR,
}

In [41]:
INDEX_FN = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg'  # Needs a tile_num column with the index and be in a projected coordinate system
# Get first layer name
layer_names = fiona.listlayers(INDEX_FN)
# Get the first layer name
first_layer_name = layer_names[0]
INDEX_LYR = first_layer_name

BASE_OUTDIR = '/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/tmp/tests_evi2_p95_2'
# INDEX_LYR

In [42]:
# tiles = [1]#[5,10]  
years = [2022]

# All
with fiona.open(INDEX_FN, layer=0) as src:
    tiles = [feature["properties"]["tile_num"] for feature in src]
    
tiles_gpkg = gpd.read_file(INDEX_FN)
tiles = tiles_gpkg["tile_num"].tolist()
# # tiles

In [43]:
len(tiles)

1561

In [44]:
# TEST
# tiles_run = tiles[0:1]
# tiles_run
# CPU times: user 560 ms, sys: 555 ms, total: 1.11 s
# Wall time: 5min 41s
# Only month 9 10 11 with valid results 

# Run 1
tiles_run = tiles[3:4]
tiles_run


[4]

## Running all tiles 1 year multiprocess

In [45]:
# Printing output directory

outdirs = []

for tile in tiles_run: 
    for year in years:
        for month in range(1, 13):
            start_day = f"{month:02d}-01"
            end_day = f"{month:02d}-{monthrange(year, month)[1]:02d}"
            
            outdir = f'{BASE_OUTDIR}/tile_{tile:03d}/{year}/{month:02d}'
            outdirs.append(outdir)

outdirs[-1]

'/projects/my-private-bucket/HLS-1DCNN-AGB/data/tif/tmp/tests_evi2_p95_2/tile_004/2022/12'

In [46]:
import copy

In [47]:
years = [2022]
# years = [2018,2019,2020,2021]
# years = [2015,2016,2017]

# type(years)

In [48]:
%%time
# Create parameter list and create output directories
params_list = []
for tile in tiles_run: 
    for year in years:
        # for month in range(2, 4):
        for month in range(1, 13):
            start_day = f"{month:02d}-01"
            end_day = f"{month:02d}-{monthrange(year, month)[1]:02d}"
    
            # output directory for specific run
            # outdir = f'{BASE_OUTDIR}/tile_{tile:03d}/{year}/{month:02d}'
            outdir = f'{BASE_OUTDIR}/tile_{tile:03d}/{year}/'
            
            os.makedirs(outdir, exist_ok=True)
    
            run_params = copy.deepcopy(params)
            run_params.update({
                'FOCAL_TILE': tile,
                'YEAR': year,
                'SEASON_START': start_day,
                'SEASON_STOP': end_day,
                'OUTDIR': str(outdir)
            })
    
            params_list.append(run_params)



CPU times: user 2.03 ms, sys: 94 µs, total: 2.12 ms
Wall time: 161 ms


In [49]:
# n_process = mp.cpu_count() - 1
# n_process

In [50]:
len(params_list)

12

In [51]:
%%time
# mp.cpu_count() - 1
with mp.Pool(processes= 5) as pool:
    fn_list = pool.map(wrapper_composite, params_list)


Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30

Tiles path:		 /projects/my-private-bucket/HLS-1DCNN-AGB/data/shp/atlantic_forest/tiles/br_af_grid30km_prj.gpkg
Tile number:		 4
Output res (m):		 30
in_bbox:		 [4839419.049164498, 7374073.29125729, 4869419.049164498, 7404073.29125729]
bbox 4326:		 [-55.574889898123715, -23.729593329414808, -55.278057362641555, -23.456105892179057]
Getting output dims from buffered (buffer=0.0) original tile geome

In [None]:
fn = fn_list[0]
fn

In [None]:
rescaled_multiband_fn = fn.replace('.tif', '_rescaled_3band_temp.tif')

plotlib.rescale_multiband_for_plot(fn, rescaled_multiband_fn, bandlist = [6,4,3], pct=[20,98], nodata=-9999.0) 


fig,ax=plt.subplots(figsize=(10,10))
with rasterio.open(rescaled_multiband_fn) as src:
    #print(src.profile)
    show(src.read(),transform=src.transform, ax=ax, title=os.path.basename(rescaled_multiband_fn))

In [None]:
### Notes

# Time to run 1 tile 1 year
# CPU times: user 487 ms, sys: 411 ms, total: 898 ms
# Wall time: 4min 15s

# Time to run 1 tile 2018 - 2022
# CPU times: user 1.93 s, sys: 1.7 s, total: 3.62 s
# Wall time: 18min 22s

In [None]:
## CHECK ERRORS