# Example API search for Landsat TM and HLS data

#### Using 2 separate STAC api urls

#### API for HLS
https://lpdaac.usgs.gov/products/hlss30v002/

In [None]:
hls_api_url = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD'

#### API for USGS LandsatLook
https://landsatlook.usgs.gov/

In [5]:
#landsat_api_url = "https://landsatlook.usgs.gov/sat-api" # old one
landsat_api_url = "https://landsatlook.usgs.gov/stac-server"

#### Use this example update `query_stac()` in `fetch_HLS.py`

In [78]:
sat_api_url = landsat_api_url

In [77]:
import pystac_client
pystac_client.__version__

'0.6.1'

In [7]:
from pystac_client import Client

cat = Client.open(sat_api_url)

for collection in cat.get_all_collections():
    print(collection)
#... take the collection name and use below

# Filter by platform
query = {
    #"platform": {"eq": "LANDSAT_4"}
    "platform": {"eq": "LANDSAT_5"}
    #"platform": {"eq": "LANDSAT_7"}
    #"platform": {"eq": "LANDSAT_8"}
    #"platform": {"eq": "LANDSAT_9"}
}

# can also add bbox, and datetime ranges here
results = cat.search(
    collections='landsat-c2l2-sr',
    max_items=10,
    query=query
)

for item in results.items():
    print(item)

<CollectionClient id=landsat-c2l2-sr>
<CollectionClient id=landsat-c2l2-st>
<CollectionClient id=landsat-c2ard-st>
<CollectionClient id=landsat-c2l2alb-bt>
<CollectionClient id=landsat-c2l3-fsca>
<CollectionClient id=landsat-c2ard-bt>
<CollectionClient id=landsat-c2l1>
<CollectionClient id=landsat-c2l3-ba>
<CollectionClient id=landsat-c2l2alb-st>
<CollectionClient id=landsat-c2ard-sr>
<CollectionClient id=landsat-c2l2alb-sr>
<CollectionClient id=landsat-c2l2alb-ta>
<CollectionClient id=landsat-c2l3-dswe>
<CollectionClient id=landsat-c2ard-ta>
<Item id=LT05_L2SP_038037_20120505_20200820_02_T1_SR>
<Item id=LT05_L2SP_031036_20120504_20200820_02_T1_SR>
<Item id=LT05_L2SP_031035_20120504_20200820_02_T1_SR>
<Item id=LT05_L2SP_031034_20120504_20200820_02_T1_SR>
<Item id=LT05_L2SP_031033_20120504_20200820_02_T1_SR>
<Item id=LT05_L2SP_039038_20120426_20200820_02_T1_SR>
<Item id=LT05_L2SP_039037_20120426_20200820_02_T1_SR>
<Item id=LT05_L2SP_039036_20120426_20200820_02_T1_SR>
<Item id=LT05_L2SP_

In [None]:
#item

### Search MAAP

In [9]:
# import the MAAP package
from maap.maap import MAAP

# import printing package to help display outputs
from pprint import pprint

# invoke the MAAP constructor using the maap_host argument
maap = MAAP(maap_host='api.maap-project.org')

In [10]:
MAX_RESULTS = 500

In [11]:
collections = maap.searchCollection(
    #short_name='LC01_Landsat_1187',
    #version='006',
    cmr_host='cmr.earthdata.nasa.gov',
    cloud_hosted="true",
    limit=5000
)
len(collections)

5000

In [12]:
[print(collections[i]['Collection']['ShortName']) for i in range(0,len(collections)) if 'Landsat' in collections[i]['Collection']['ShortName']]

CD34_Amazon_Landsat_1176
CD37_Biomass_Landsat_Glas_1145
LC01_Landsat_1187
LC09_Landsat_987
LC10_Landsat_TM_852
LC10_Landsat_ETM_846
LC35_Landsat7_Fire_Masks_1071
ND02_Landsat_TM_MSS_Para_1156
Landsat8_Sentinel2_Phenocam_2248


[None, None, None, None, None, None, None, None, None]

#### The ABoVE Boreal AGB c2020 data

In [117]:
[print(collections[i]['Collection']['ShortName']) for i in range(0,len(collections)) if 'ICESat' in collections[i]['Collection']['ShortName']]

Boreal_AGB_Density_ICESat2_2186


[None]

In [16]:
import requests
import datetime
import geopandas as gpd
import json
import os
import numpy as np
import rasterio as rio
from rasterio.warp import *
#from CovariateUtils import get_index_tile, get_creds
import itertools
import botocore
import boto3

# TODO: update 3.1.2_dps.py with new LANDSAT 4/5/7 functionality
### This will allow us to build composites of Landsat back to 1984
Need to think about:  
a.  band names are different in the Landsat Collection 2 SR dataset `('blue', 'green', 'red', 'nir08', 'swir16', 'swir22')`  
b. `HLS_product` variable name should be changed  
c. create `get_LANDSAT_data()` inside `fetch_LANDSAT.py`

### Update and test this function before - put this into `fetch_LANDSAT.py`

In [74]:
def query_stac(year, bbox, max_cloud, api, start_month_day, end_month_day, MS_product='L30', MS_product_version='2.0', MAX_N_RESULTS=500):
    
    print('\nQuerying STAC for multispectral imagery...')
    catalog = Client.open(api)
    
    date_min = str(year) + '-' + start_month_day

    date_max = str(year) + '-' + end_month_day
    start_date = datetime.datetime.strptime(date_min, "%Y-%m-%d")
    end_date = datetime.datetime.strptime(date_max, "%Y-%m-%d") 
    start = start_date.strftime("%Y-%m-%dT00:00:00Z")
    end = end_date.strftime("%Y-%m-%dT23:59:59Z")
    
    print('start date, end date:\t\t', start, end)
    
    # Note: H30 this is our name for a HARMONIZED 30m composite with S30 and L30
    if MS_product == 'L30' or MS_product == 'S30':
        MS_product_list = [f"HLS{MS_product}.v{MS_product_version}"]
    if MS_product == 'H30':
        MS_product_list = [f"HLSL30.v{MS_product_version}", f"HLSS30.v{MS_product_version}"]
    if MS_product == 'landsat-c2l2-sr':
        MS_product_list = [MS_product]
        
    print(f'\nConducting multispectral image search now...')
    print(f'Searching for:\t\t\t{MS_product_list}')
    
    search = catalog.search(
        collections=MS_product_list,
        datetime=[start,end],
        bbox=bbox,
        limit=MAX_N_RESULTS,
        max_items=MAX_N_RESULTS, # for testing, and keep it from hanging
        # query={"eo:cloud_cover":{"lt":20}} #doesn't work
    )
    results = search.get_all_items_as_dict()
    
    print("initial results:\t\t", len(results['features']))
    
    filtered_results = []
    for i in results['features']:
        if int(i['properties']['eo:cloud_cover']) <= max_cloud:
            filtered_results.append(i)
    
    results['features'] = filtered_results

    print("filtered results:\t\t", len(results['features']))
    print('\nSearch complete.\n')
    return results

def get_LANDSAT_data(in_tile_fn, in_tile_layer, in_tile_id_col, in_tile_num, out_dir, sat_api, start_year, end_year, start_month_day, end_month_day, max_cloud, local=False, hls_product='L30', hls_product_version='2.0'):

    # Need a dict that used HLS product to specify band names
    HLS_bands_dict = dict({
                            'L30':  ['B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'Fmask'], 
                            'S30':  ['B02', 'B03', 'B04', 'B8A', 'B11', 'B12', 'Fmask'],
                            'landsat-c2l2-sr': ['blue', 'green', 'nir08' , 'swir16', 'swir22', 'cloud_qa'] #TODO check if 'cloud_qa' is same format as 'Fmask'
                          })
    
    if hls_product != 'L30' and hls_product != 'S30' and hls_product != 'H30' and hls_product != 'landsat-c2l2-sr':
        print("Landsat or HLS product type not recognized: Must be landsat-c2l2-sr, L30, S30, or both [H30].")
        os._exit(1)
        
    geojson_path_albers = in_tile_fn
    layer = in_tile_layer
    tile_n = int(in_tile_num)
    
    print('\nGetting Landsat or HLS data...')
    
    tile_id = get_index_tile(geojson_path_albers, in_tile_id_col, tile_n, buffer=0, layer = layer)
    #print(tile_id)
    # Accessing imagery
    # Select an area of interest
    bbox_list = [tile_id['bbox_4326']]
    max_cloud = max_cloud
    years = range(int(start_year), int(end_year)+1)
    api = sat_api
    
    #
    # Query the STAC
    #
    for bbox in bbox_list:
        # Geojson of total scenes - Change to list of scenes
        print(f'bbox: {bbox}')
        response_by_year = [query_stac(year, bbox, max_cloud, api, start_month_day, end_month_day, HLS_product=hls_product, HLS_product_version=hls_product_version) for year in years]
        
        print(len(response_by_year[0]['features']))
    
    # Take the search over several years, write the geojson response for each
    ## TODO: need unique catalog names that indicate bbox tile, and time range used.
    save_path = out_dir
    if (not os.path.isdir(save_path)): os.mkdir(save_path)

    merge_catalogs = {
        "type": "FeatureCollection",
        "features": list(itertools.chain.from_iterable([f["features"] for f in response_by_year])),
    }

    
    #
    # Write local JSON that catalogs the HLS data retrieved from query
    #
    master_json = os.path.join(save_path, f'master_{tile_n}_{np.min(years)}-{start_month_day}_{np.max(years)}-{end_month_day}_HLS.json')
    with open(master_json, 'w') as outfile:
            json.dump(merge_catalogs, outfile)

    master_json = write_local_data_and_catalog_s3(master_json, HLS_bands_dict, save_path, local, s3_path="s3://")
    
    return master_json

### Query Landsat - this works as expected

In [67]:
results_landsat = query_stac(2013, [-122.4969372889167, 59.282959601314104, -120.20175133769581, 60.40424594544261], 
               100, # max cloud 
               landsat_api_url, 
               #hls_api_url,
               '06-01', '10-15',
              MS_product = 'landsat-c2l2-sr')


Querying STAC for multispectral imagery...
start date, end date:		 2013-06-01T00:00:00Z 2013-10-15T23:59:59Z

Conducting multispectral image search now...
Searching for:			['landsat-c2l2-sr']
initial results:		 123
filtered results:		 123

Search complete.



In [68]:
list(results_landsat.get('features')[0].get('assets').keys())

['thumbnail',
 'reduced_resolution_browse',
 'index',
 'MTL.json',
 'blue',
 'green',
 'red',
 'nir08',
 'swir16',
 'swir22',
 'atmos_opacity',
 'cloud_qa',
 'qa_pixel',
 'qa_radsat',
 'ANG.txt',
 'MTL.txt',
 'MTL.xml']

In [69]:
file_href = results_landsat.get('features')[0].get('assets').get('blue').get('href')
file_href

'https://landsatlook.usgs.gov/data/collection02/level-2/standard/etm/2013/048/019/LE07_L2SP_048019_20131013_20200907_02_T1/LE07_L2SP_048019_20131013_20200907_02_T1_SR_B1.TIF'

### Query HLS - being limited to max 100 results

In [75]:
results = query_stac(2024, 
                     [178.3104249050895, 63.20910529144648, 179.99999, 64.01360592554103], # tile 3916
                10, 
               #landsat_api_url, 
               hls_api_url,
               '07-01', '08-31',
                MS_product='H30')


Querying STAC for multispectral imagery...
start date, end date:		 2024-07-01T00:00:00Z 2024-08-31T23:59:59Z

Conducting multispectral image search now...
Searching for:			['HLSL30.v2.0', 'HLSS30.v2.0']
initial results:		 100
filtered results:		 19

Search complete.



In [155]:
file_href = results.get('features')[0].get('assets').get('B04').get('href')
file_s3 = file_href.replace('https://data.lpdaac.earthdatacloud.nasa.gov/', 's3://')
file_s3

's3://lp-prod-protected/HLSL30.020/HLS.L30.T10VEN.2013205T191211.v2.0/HLS.L30.T10VEN.2013205T191211.v2.0.B04.tif'

# Local test: does an HLS composite run locally

In [428]:
#!python /projects/code/icesat2_boreal/lib/3.1.2_dps.py --in_tile_fn /projects/shared-buckets/montesano/databank/boreal_tiles_v004.gpkg --in_tile_num 37149 --in_tile_layer boreal_tiles_v004 --sat_api https://cmr.earthdata.nasa.gov/stac/LPCLOUD --tile_buffer_m 0 --start_year 2021 --end_year 2021 --start_month_day 07-01 --end_month_day 08-31 --max_cloud 50 --composite_type HLS --shape 3000 --hls_product H30 -o /projects/local_output --search_only

In [460]:
DPS_INPUT_TILE_NUM_LIST = [37149, 2422]

In [65]:
INDEX_FN =  'https://maap-ops-workspace.s3.amazonaws.com/shared/montesano/databank/boreal_tiles_v004.gpkg'
INDEX_LYR = 'boreal_tiles_v004'
SAT_API = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD'
OUTDIR = '/projects/my-private-bucket/tmp'
YEAR = 2021
MS_DATA_TYPE = 'HLS'

args = f"-i {INDEX_FN} \
-lyr {INDEX_LYR} \
-a {SAT_API} \
--tile_buffer_m 0 \
--in_tile_num {2422} \
-o {OUTDIR} \
-sy {YEAR} -ey {YEAR} -smd {SEASON_START} -emd {SEASON_STOP} -mc {10} \
--composite_type {MS_DATA_TYPE} \
--hls_product H30 \
--rangelims_red 0 1 \
--thresh_min_ndvi -1 \
--search_only"

cmd = f'python /projects/code/icesat2_boreal/lib/3.1.2_dps.py {args}'
!echo $cmd
!eval $cmd
#!echo
#!eval $cmd

NameError: name 'SEASON_START' is not defined