In [None]:
from satsearch import Search
import stackstac, os, requests
from netrc import netrc
from subprocess import Popen
from getpass import getpass
import rasterio
from distributed import LocalCluster,Client

In [None]:
data = 'hls'
#data = 'sentinel_2a'
s3 = False

In [None]:
if data == 'hls':
    #Setup NASA Credentials
    urs = 'urs.earthdata.nasa.gov'    # Earthdata URL to call for authentication
    prompts = ['Enter NASA Earthdata Login Username \n(or create an account at urs.earthdata.nasa.gov): ',
               'Enter NASA Earthdata Login Password: ']
    try:
        netrcDir = os.path.expanduser("~/.netrc")
        netrc(netrcDir).authenticators(urs)[0]
        del netrcDir

    # Below, create a netrc file and prompt user for NASA Earthdata Login Username and Password
    except FileNotFoundError:
        homeDir = os.path.expanduser("~")
        Popen('touch {0}.netrc | chmod og-rw {0}.netrc | echo machine {1} >> {0}.netrc'.format(homeDir + os.sep, urs), shell=True)
        Popen('echo login {} >> {}.netrc'.format(getpass(prompt=prompts[0]), homeDir + os.sep), shell=True)
        Popen('echo password {} >> {}.netrc'.format(getpass(prompt=prompts[1]), homeDir + os.sep), shell=True)
        del homeDir, urs, prompts

In [None]:
if s3:
    #Get NASA Temp AWS Credentials
    s3_cred = requests.get('https://lpdaac.earthdata.nasa.gov/s3credentials').json()
    s3_cred
    
    env = dict(GDAL_DISABLE_READDIR_ON_OPEN='YES', 
           GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
           GDAL_SWATH_SIZE='200000000',
           VSI_CURL_CACHE_SIZE='200000000',
           CPL_VSIL_CURL_ALLOWED_EXTENSIONS='TIF',
           GDAL_HTTP_UNSAFESSL='YES',
           GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
           GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'),
           AWS_REGION='us-west-2',
           AWS_SECRET_ACCESS_KEY=s3_cred['secretAccessKey'],
           AWS_ACCESS_KEY_ID=s3_cred['accessKeyId'],
           AWS_SESSION_TOKEN=s3_cred['sessionToken'])
    os.environ.update(env)

# Default to the StackStac ~.LayeredEnv
else:
    env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 
                   AWS_NO_SIGN_REQUEST='YES',
                   GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
                   GDAL_SWATH_SIZE='200000000',
                   VSI_CURL_CACHE_SIZE='200000000',
                   GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                   GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
    os.environ.update(env)

In [None]:
def get_STAC_items(url, collection, dates, bbox):
  results = Search.search(url=url,
                      collections=collection, 
                      datetime=dates,
                      bbox=bbox)

  return(results)

def remap_s3_url(stac):
    for i,entry in enumerate(stac):
        for asset in entry['assets'].keys():
            stac[i]['assets'][asset]['href'] = stac[i]['assets'][asset]['href'].replace('https://lpdaac.earthdata.nasa.gov/',
                                                                                          '/vsis3/')
    return(stac)
if data == 'hls':
    url = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD/' 
    collection = ['HLSS30.v1.5']#'C1711924822-LPCLOUD' #HLS
    bbox=[-104.79107047,   40.78311181, -104.67687336,   40.87008987]
    dates = '2020-01-01/2021-02-10'

    stac_items = get_STAC_items(url,collection,dates,','.join(map(str, bbox))).items()
    s_col = stac_items.geojson()['features']
    if s3:
        s_col = remap_s3_url(s_col)
if data == 'sentinel_2a':
    url = 'https://earth-search.aws.element84.com/v0'
    collection = ['sentinel-s2-l2a-cogs']
    bbox=[-104.79107047,   40.78311181, -104.67687336,   40.87008987]
    dates = '2020-11-01/2022-02-10'

    stac_items = get_STAC_items(url,collection,dates,bbox).items()
    s_col = stac_items.geojson()['features']
    if s3:
        s_col = remap_s3_url(s_col)
print('Number of Items: ',len(s_col))

In [None]:
with rasterio.open(s_col[0]['assets']['B01']['href']) as src:
    b= src.bounds
    prof = src.profile
    dtype = prof['dtype']
    nodata = prof['nodata']
    print(prof)

In [None]:
#lab_extension = /user/<username>/proxy/8787/status
cluster = LocalCluster(threads_per_worker=1)
cl = Client(cluster)
cl

In [None]:
da = stackstac.stack(s_col,dtype=dtype,
                     fill_value=nodata,
                     resolution=30,
                     epsg=32613,
                     chunksize=-1,
                     assets=['B01'],
                     bounds=list(b))#,gdal_env=stackstac.LayeredEnv(env))
da

In [None]:
%timeit da.compute()

<center>
<h3>Results from Previous Runs<\h3>
</center><br>

| Data      |  Size | file method | LocalCluster Specs | timeit results |
|-----------|-------|-------------|-------|----------------|
| HLS       |1.21 GB|/vsis3/|workers=4;threadsperworker=1;mem=8.59GB|32.5 s ± 984 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)|
| HLS       |1.21 GB|/vsicurl/|workers=4;threadsperworker=1;mem=8.59GB| Does Not Complete |
|Sentinel_2a|1.61 GB|/vsis3/|workers=4;threadsperworker=1;mem=8.59GB|10.9 s ± 273 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)|
|Sentinel_2a|1.61 GB|/vsicurl/|workers=4;threadsperworker=1;mem=8.59GB|11 s ± 375 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)|