In [84]:
import json
import os
import rasterio as rio
import rioxarray
import hvplot.xarray

In [86]:
%%bash 
# Set up temporary credentials for GDAL
# https://github.com/OSGeo/gdal/issues/4058
# NOTE: these last for 1 hour
aws sts assume-role-with-web-identity \
 --role-arn $AWS_ROLE_ARN \
 --role-session-name snowex \
 --web-identity-token file://$AWS_WEB_IDENTITY_TOKEN_FILE \
 --duration-seconds 3600 > /tmp/irp-cred.json

# Print out creds    
#echo AWS_REGION="us-west-2"
#echo AWS_ACCESS_KEY_ID="$(cat /tmp/irp-cred.json | jq -r ".Credentials.AccessKeyId")"
#echo AWS_SECRET_ACCESS_KEY="$(cat /tmp/irp-cred.json | jq -r ".Credentials.SecretAccessKey")"
#echo AWS_SESSION_TOKEN="$(cat /tmp/irp-cred.json | jq -r ".Credentials.SessionToken")"

In [87]:
# Use temporary credentials in python (rasterio)

with open('/tmp/irp-cred.json') as f:
    creds = json.load(f)
    AccessKeyId = creds['Credentials']['AccessKeyId']
    SecretAccessKey = creds['Credentials']['SecretAccessKey']
    SessionToken = creds['Credentials']['SessionToken']
    
    # Set environment variables
    os.environ['AWS_ACCESS_KEY_ID'] = AccessKeyId
    os.environ['AWS_SECRET_ACCESS_KEY'] = SecretAccessKey
    os.environ['AWS_SESSION_TOKEN'] = SessionToken

# GDAL env variables https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access
Env = rio.Env(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR')

In [105]:
import subprocess

def s3_img_access(regions = ['grmesa','peeler','rockmt','silver','uticam'],
                 years = [2015,2016,2017,2018,2019,2020,2021],
                 day_threshold = 364,
                 types = ['cor','unw'],
                 bands = ['HH','VV','HV','VH'],
                 bucket_url = 's3://snowex-data/uavsar-project/UAVSAR_images/'):
    s = subprocess.Popen(["aws s3 ls {}".format(bucket_url)], shell=True, stdout=subprocess.PIPE).stdout
    service_states = s.read().splitlines()

    list_dirs = []
    for i in service_states:
        i_str = i.decode("utf-8")
        i_final = i_str.replace(' ','').replace('PRE','').replace('\'','').replace('/','')
        list_dirs.append(i_final)
    list_dirs=list_dirs[:-1]
    img_dict = {}
    for direc in list_dirs:
        dir_region = direc.split('_')[0].lower()
        dir_year = int(direc.split('_')[1])
        dir_days = int(direc.split('_')[4].replace('d',''))
        if dir_region in regions:
            if dir_year in years:
                if dir_days < day_threshold:
                    s = subprocess.Popen(["aws s3 ls {}{}/".format(bucket_url,direc)], shell=True, stdout=subprocess.PIPE).stdout
                    service_states = s.read().splitlines()
                    file_dirs = []
                    for i in service_states:
                        i_str = i.decode("utf-8")
                        i_final = i_str.split(' ')[-1]
                        for t in types:
                            if t in i_final:
                                for band in bands:
                                    if band in i_final:
                                        file_dirs.append(i_final)
                    for file in (file_dirs):
                        s3_url = (bucket_url+direc+'/'+file)
                        # Now you can read it directly and work with subsets, etc
                        import rioxarray
                        with Env:
                            da = rioxarray.open_rasterio(s3_url, masked=True).squeeze('band', drop=True)
                        da.name = os.path.basename(s3_url)
                        img_dict[s3_url] = da
    return img_dict

In [107]:
img_dict = s3_img_access()
len(img_dict)

238

In [116]:
# Now you can read it directly and work with subsets, etc
import rioxarray
#print(img_dict.keys())
da = img_dict['s3://snowex-data/uavsar-project/UAVSAR_images/GrMesa_2015_054_081_0006d/GrMesa_08112_15054-004_15055-006_0006d_s01_L090HH_01.cor.grd.tiff']
da

In [117]:
# Efficiently plot in browser
import hvplot.xarray
da.hvplot.image(rasterize=True, cmap='gray')