In [42]:
'''

change for git test
another test

Query CMR STAC for HLS data given a point location and date range
Return a list of asset filenames for AWS or HTTPS access
'''
from datetime import datetime, timedelta
import json
import os
import requests
import boto3
from botocore.exceptions import ClientError
from pystac_client import Client


In [2]:
# which collections to search
collections = ['HLSL30.v2.0', 'HLSS30.v2.0']

# define the point location/centroid for the HLS tile we want
pt = json.loads('{"type":"Point", "coordinates":[-105.530017, 40.15442]}')

# define the dates we want to query
#date_range = "2021-05-01T00:00:00Z/2021-08-30T23:59:59Z"    # closed interval
#date_range = "2021-05-01T00:00:00Z/.."                      # open interval - does not currently work with the CMR-STAC API
#date_range = "2021-05/2021-11"
start_date = datetime(year=2021, day=1, month=1)
end_date = datetime(year=2021, day=31, month=12)
step_date = timedelta(days=28)
search_dates = []

while start_date <= end_date:
    temp_date = start_date.strftime("%Y-%m-%d")
    search_dates.append(temp_date)
    start_date += step_date

if start_date > end_date:
    search_dates.append(end_date.strftime("%Y-%m-%d"))

print(search_dates)

['2021-01-01', '2021-01-29', '2021-02-26', '2021-03-26', '2021-04-23', '2021-05-21', '2021-06-18', '2021-07-16', '2021-08-13', '2021-09-10', '2021-10-08', '2021-11-05', '2021-12-03', '2021-12-31', '2021-12-31']


In [3]:
def search_stac_for_HLS(pt, dt_min, dt_max, cloudcover_max=80, lim=100, url='https://cmr.earthdata.nasa.gov/stac/LPCLOUD', collections=['HLSL30.v2.0', 'HLSS30.v2.0']):
    # open the catalog
    catalog = Client.open(f'{url}')
    
    # perform the search
    search = catalog.search(
        collections=collections,
        intersects=pt,
        datetime=dt_min + '/' + dt_max,
        limit=lim
    )

    links = []

    if search.matched() == 0:
        print('No granules found at point', pt, 'from', dt_min, 'to', dt_max)
    else:
        print('Found', search.matched(), 'granules at point', pt, 'from', dt_min, 'to', dt_max)
        item_collection = search.get_all_items()
        
        for i in item_collection:
            if i.properties['eo:cloud_cover'] <= cloudcover_max:
                if len(links) == 0:
                    print(i.properties)
                for a in i.assets:
                    links.append(i.assets[a].href)

    return(links)

In [4]:
hls_links = search_stac_for_HLS(pt, search_dates[0], search_dates[-1])

Found 77 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-01-01 to 2021-12-31
{'datetime': '2021-01-01T18:02:55.690Z', 'start_datetime': '2021-01-01T18:02:55.690Z', 'end_datetime': '2021-01-01T18:02:55.690Z', 'eo:cloud_cover': 50}


In [6]:
'''
hls_links = []

for i in range(1, len(search_dates)):
    hls_links += search_stac_for_HLS(pt, search_dates[i-1], search_dates[i])
'''

Found 7 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-01-01 to 2021-01-29
Found 7 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-01-29 to 2021-02-26
Found 5 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-02-26 to 2021-03-26
Found 8 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-03-26 to 2021-04-23
Found 7 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-04-23 to 2021-05-21
Found 7 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-05-21 to 2021-06-18
Found 8 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-06-18 to 2021-07-16
Found 7 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from 2021-07-16 to 2021-08-13
Found 7 granules at point {'type': 'Point', 'coordinates': [-105.530017, 40.15442]} from

In [49]:
print(hls_links[0:10])

['https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B12.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B02.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B06.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B8A.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B08.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.SAA.tif', 'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/H

In [72]:
# convert https links to s3 links
s3_links = [l.replace('https://data.lpdaac.earthdatacloud.nasa.gov/', 's3://') for l in hls_links]
s3_links = [l.replace('https://cmr.earthdata.nasa.gov/', 's3://') for l in s3_links]
print(s3_links[0:20])

['s3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B12.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B02.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B06.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B8A.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B08.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.SAA.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B05.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B03.tif', 's3://lp-prod-protected/HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T

In [73]:
def fix_links(src_link, src_dirs, dst_dir, meta_dir, add_tile_dir=True):
    dst_link = src_link

    if '.xml' in dst_link:
        dst_link2 = os.path.join(meta_dir, os.path.basename(dst_link))
    else:
        for src_dir in src_dirs:
            dst_link = dst_link.replace(src_dir, dst_dir)
        dst_splits = dst_link.split('/')
        dst_link2 = '/'.join(dst_splits[0:2]) + \
            '/' + dst_splits[3].split('.')[2] + \
            '/' + '/'.join(dst_splits[3:])
    
    return(dst_link2)

#fix_links(s3_links[0], 's3://lp-prod-protected', './HLS_data')
local_links = [fix_links(src_link=l, src_dirs=['s3://lp-prod-protected', 's3://lp-prod-public'], dst_dir='./HLS_data', meta_dir='./HLS_metadata') for l in s3_links]
print(local_links[0:10])

['./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B12.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B02.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B06.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B8A.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B08.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.SAA.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B05.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B03.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B11.tif', './HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B01.tif']


In [74]:
for i in range(0, 30):
    print(local_links[i])

./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B12.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B02.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B06.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B8A.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B08.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.SAA.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B05.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B03.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B11.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B01.tif
./HLS_data/T13TDE/HLS.S30.T13TDE.2021001

In [37]:
# get credentials
s3_cred_endpoint = 'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
def get_temp_creds():
    temp_creds_url = s3_cred_endpoint
    return requests.get(temp_creds_url).json()

temp_creds_req = get_temp_creds()

session = boto3.Session(aws_access_key_id=temp_creds_req['accessKeyId'], 
                        aws_secret_access_key=temp_creds_req['secretAccessKey'],
                        aws_session_token=temp_creds_req['sessionToken'],
                        region_name='us-west-2')

In [41]:
def make_dirs(dst_links):
    for dst_link in dst_links:
        os.makedirs(os.path.dirname(dst_link), exist_ok=True)
        
make_dirs(local_links)

In [43]:
bckt_toks = src_dir.split('/')
print(bckt_toks)
bckt = '/'.join(bckt_toks[2:3]) #[1:]
print(bckt)
try:
    #session = boto3.session.Session()
    session = boto3.Session(aws_access_key_id=temp_creds_req['accessKeyId'], 
                        aws_secret_access_key=temp_creds_req['secretAccessKey'],
                        aws_session_token=temp_creds_req['sessionToken'],
                        region_name='us-west-2')
    s3_resource = session.resource('s3')
    s3_resource.meta.client.head_bucket(Bucket=bckt)
    print(bckt, 'is a bucket!')
except ClientError as error:
      error_code = int(error.response['Error']['Code'])
      if error_code == 403:
         print("Private Bucket. Forbidden Access! ", bckt)
      elif error_code == 404:
         print("Bucket Does Not Exist!", bckt)

['s3:', '', 'lp-prod-protected', 'HLSS30.020', 'HLS.S30.T13TDE.2021001T175741.v2.0']
lp-prod-protected
Private Bucket. Forbidden Access!  lp-prod-protected


In [47]:
bucket = 'lp-prod-protected'
src_remove = 's3://' + bucket + '/'
objs = [l.replace(src_remove, '') for l in s3_links]

dst_file = local_links[0]

#s3 = boto3.client('s3')
s3 = session.client('s3')
with open(dst_file, 'wb') as f:
    print(bucket, objs[0])
    s3.download_fileobj(bucket, objs[0], f)

lp-prod-protected HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B12.tif


In [None]:
L30_BAND_NAMES = {
    'coastal_aerosol': 'B01',
    'blue',
    'green',
    'red',
    'nir_narrow',
    'swir_1',
    'swir_2',
    'cirrus',
    'thermal_infrared_1',
    'thermal_infrared_2',
    'qa'
}

S30_BAND_NAMES = {
    'coastal_aerosol',
    'blue',
    'green',
    'red',
    'red-edge_1',
    'red-edge_2',
    'red-edge_3',
    'nir_broad',
    'nir_narrow',
    'water_vapor',
    'cirrus',
    'swir_1',
    'swir_2',
    'qa'
}

def stack_bands(s3_links, S30_bands, L30_bands):
    

In [76]:
s3 = session.client('s3')

def download_data(s3_links, local_links, s3_session):
    s3_links = [l.replace('s3://', '') for l in s3_links]
    
    for i in range(0, len(s3_links)):
        s3_link = s3_links[i]
        s3_bucket = s3_link.split('/')[0]
        s3_link = s3_link.replace(s3_bucket +'/', '')        
        local_link = local_links[i]
        
        # ignore XML files for now, figure out how to get them later because they contain useful information
        if not '.xml' in local_link:
            with open(local_link, 'wb') as f:
                print(i, s3_bucket, s3_link, local_link)
                s3.download_fileobj(s3_bucket, s3_link, f)

download_data(s3_links, local_links, s3)

0 lp-prod-protected HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B12.tif ./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B12.tif
1 lp-prod-protected HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B02.tif ./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B02.tif
2 lp-prod-protected HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B06.tif ./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B06.tif
3 lp-prod-protected HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B8A.tif ./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B8A.tif
4 lp-prod-protected HLSS30.020/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.2021001T175741.v2.0.B08.tif ./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T175741.v2.0/HLS.S30.T13TDE.202

In [69]:
s3_links2[19]
#local_links[19]

'https://cmr.earthdata.nasa.gov/search/concepts/G2163338554-LPCLOUD.xml'