In [1]:
from satsearch import Search
import stackstac, os, requests
from netrc import netrc
from subprocess import Popen
from getpass import getpass
import rasterio
from distributed import LocalCluster,Client

In [2]:
data = 'hls'
#data = 'sentinel_2a'
#data = 'landsat8'
s3 = False

if data == 'sentinel_2a':
    band='B05'
if data == 'hls':
    band='B01'
if data == 'landsat8':
    band='B1'

In [3]:
if data == 'hls':
    #Setup NASA Credentials
    urs = 'urs.earthdata.nasa.gov'    # Earthdata URL to call for authentication
    prompts = ['Enter NASA Earthdata Login Username \n(or create an account at urs.earthdata.nasa.gov): ',
               'Enter NASA Earthdata Login Password: ']
    try:
        netrcDir = os.path.expanduser("~/.netrc")
        netrc(netrcDir).authenticators(urs)[0]
        del netrcDir

    # Below, create a netrc file and prompt user for NASA Earthdata Login Username and Password
    except FileNotFoundError:
        homeDir = os.path.expanduser("~")
        Popen('touch {0}.netrc | chmod og-rw {0}.netrc | echo machine {1} >> {0}.netrc'.format(homeDir + os.sep, urs), shell=True)
        Popen('echo login {} >> {}.netrc'.format(getpass(prompt=prompts[0]), homeDir + os.sep), shell=True)
        Popen('echo password {} >> {}.netrc'.format(getpass(prompt=prompts[1]), homeDir + os.sep), shell=True)
        del homeDir, urs, prompts

In [4]:
if s3 and data=='hls':
    #Get NASA Temp AWS Credentials
    s3_cred = requests.get('https://lpdaac.earthdata.nasa.gov/s3credentials').json()
    s3_cred
    
    env = dict(GDAL_DISABLE_READDIR_ON_OPEN='YES', 
           GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
           GDAL_SWATH_SIZE='200000000',
           VSI_CURL_CACHE_SIZE='200000000',
           CPL_VSIL_CURL_ALLOWED_EXTENSIONS='TIF',
           GDAL_HTTP_UNSAFESSL='YES',
           GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
           GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'),
           AWS_REGION='us-west-2',
           AWS_NO_SIGN_REQUEST='NO',AWS_REQUEST_PAYER='REQUESTER',region_name='us-west-2',
           AWS_SECRET_ACCESS_KEY=s3_cred['secretAccessKey'],
           AWS_ACCESS_KEY_ID=s3_cred['accessKeyId'],
           AWS_SESSION_TOKEN=s3_cred['sessionToken'])
    os.environ.update(env)

# Default to the StackStac ~.LayeredEnv
if s3==False and data=='hls':
    env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 
                   AWS_NO_SIGN_REQUEST='YES',
                   GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
                   GDAL_SWATH_SIZE='200000000',
                   VSI_CURL_CACHE_SIZE='200000000',
                   GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                   GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
    os.environ.update(env)
if s3 and (data == 'landsat8' or data == 'sentinel_2a'):
    env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR',
               AWS_REGION='us-west-2',
               AWS_REQUEST_PAYER='REQUESTER',region_name='us-west-2',
                   AWS_NO_SIGN_REQUEST='YES',
                   GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
                   GDAL_SWATH_SIZE='200000000',
                   VSI_CURL_CACHE_SIZE='200000000')
    os.environ.update(env)

In [5]:
def get_STAC_items(url, collection, dates, bbox):
  results = Search.search(url=url,
                      collections=collection, 
                      datetime=dates,
                      bbox=bbox)

  return(results)

def remap_s3_url(stac):
    for i,entry in enumerate(stac):
        for asset in entry['assets'].keys():
            stac[i]['assets'][asset]['href'] = stac[i]['assets'][asset]['href'].replace('https://lpdaac.earthdata.nasa.gov/',
                                                                                          '/vsis3/')
            stac[i]['assets'][asset]['href'] = stac[i]['assets'][asset]['href'].replace('https://sentinel-cogs.s3.us-west-2.amazonaws.com/',
                                                                                          '/vsis3/sentinel-cogs/')
            stac[i]['assets'][asset]['href'] = stac[i]['assets'][asset]['href'].replace('https://landsat-pds.s3.us-west-2.amazonaws.com/',
                                                                                        '/vsis3/landsat-pds/')
    return(stac)

bbox = [-104.79107047,   40.78311181, -104.67687336,   40.87008987]
if data == 'hls':
    url = 'https://cmr.earthdata.nasa.gov/stac/LPCLOUD/' 
    collection = ['HLSS30.v1.5']#'C1711924822-LPCLOUD' #HLS
    dates = '2020-01-01/2021-02-10'

    stac_items = get_STAC_items(url,collection,dates,','.join(map(str, bbox))).items()
    s_col = stac_items.geojson()['features']
    if s3:
        s_col = remap_s3_url(s_col)
if data == 'sentinel_2a':
    url = 'https://earth-search.aws.element84.com/v0'
    collection = ['sentinel-s2-l2a-cogs']
    dates = '2021-01-20/2022-02-10'
    stac_items = get_STAC_items(url,collection,dates,bbox).items()
    s_col = stac_items.geojson()['features']
    if s3:
        s_col = remap_s3_url(s_col)
if data == 'landsat8':
    url = 'https://earth-search.aws.element84.com/v0'
    collection = ['landsat-8-l1-c1']
    dates = '2019-11-01/2022-02-10'
    stac_items = get_STAC_items(url,collection,dates,bbox).items()
    s_col = stac_items.geojson()['features']
    if s3:
        s_col = remap_s3_url(s_col)
print('Number of Items: ',len(s_col))

Number of Items:  45


In [6]:
with rasterio.open(s_col[0]['assets'][band]['href']) as src:
    b= src.bounds
    prof = src.profile
    dtype = prof['dtype']
    nodata = prof['nodata']
    res = src.res
    print(prof)

{'driver': 'GTiff', 'dtype': 'int16', 'nodata': -9999.0, 'width': 3660, 'height': 3660, 'count': 1, 'crs': CRS.from_wkt('PROJCS["UTM Zone 13, Northern Hemisphere",GEOGCS["Unknown datum based upon the WGS 84 ellipsoid",DATUM["Not_specified_based_on_WGS_84_spheroid",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-105],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'), 'transform': Affine(30.0, 0.0, 499980.0,
       0.0, -30.0, 4600020.0), 'blockxsize': 256, 'blockysize': 256, 'tiled': True, 'compress': 'deflate', 'interleave': 'band'}


In [7]:
#lab_extension = /user/<username>/proxy/8787/status
cluster = LocalCluster(threads_per_worker=1)
cl = Client(cluster)
cl

0,1
Client  Scheduler: tcp://127.0.0.1:45179  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 4  Memory: 8.59 GB


In [8]:
da = stackstac.stack(s_col,dtype=dtype,
                     fill_value=nodata,
                     resolution=res[0],
                     epsg=32613,
                     properties=None,
                     snap_bounds=True,
                     chunksize=-1,
                     assets=[band],
                     bounds=list(b))#,gdal_env=stackstac.LayeredEnv(env))
da

Unnamed: 0,Array,Chunk
Bytes,1.21 GB,26.79 MB
Shape,"(45, 1, 3660, 3660)","(1, 1, 3660, 3660)"
Count,136 Tasks,45 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.21 GB 26.79 MB Shape (45, 1, 3660, 3660) (1, 1, 3660, 3660) Count 136 Tasks 45 Chunks Type int16 numpy.ndarray",45  1  3660  3660  1,

Unnamed: 0,Array,Chunk
Bytes,1.21 GB,26.79 MB
Shape,"(45, 1, 3660, 3660)","(1, 1, 3660, 3660)"
Count,136 Tasks,45 Chunks
Type,int16,numpy.ndarray


In [None]:
if data == 'landsat8':
    %timeit da.isel(x=slice(2000,-2000)).compute()
else:
    %timeit da.compute()

| Data      |  Size | Time Steps| Prefix | LocalCluster Specs | %timeit Results |
|-----------|-------|------|-------|-------|----------------|
| HLS       |1.21 GB|45|/vsis3/|workers=4;threadsperworker=1;mem=8.59GB|33.1 s ± 1.97 s per loop (mean ± std. dev. of 7 runs, 1 loop each)|
| HLS       |1.21 GB|45|/vsicurl/|workers=4;threadsperworker=1;mem=8.59GB| Fails |
|Sentinel_2a|1.75 GB|29|/vsis3/|workers=4;threadsperworker=1;mem=8.59GB|12.5 s ± 127 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)|
|Sentinel_2a|1.75 GB|29|/vsicurl/|workers=4;threadsperworker=1;mem=8.59GB|12.8 s ± 623 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)|
|Landsat 8|1.62 GB|28|/vsis3/|workers=4;threadsperworker=1;mem=8.59GB|25.6 s ± 997 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)|
|Landsat 8|1.62 GB|28|/vsicurl/|workers=4;threadsperworker=1;mem=8.59GB|26.1 s ± 1.1 s per loop (mean ± std. dev. of 7 runs, 1 loop each)|