In [38]:
%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
import time
import os
import pandas
import requests
import boto3
import numpy as np
import xarray as xr
import rasterio as rio
from rasterio.session import AWSSession
from rasterio.plot import show
import rioxarray

In [2]:
# get credentials
s3_cred_endpoint = 'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'
def get_temp_creds():
    temp_creds_url = s3_cred_endpoint
    return requests.get(temp_creds_url).json()

temp_creds_req = get_temp_creds()

session = boto3.Session(aws_access_key_id=temp_creds_req['accessKeyId'], 
                        aws_secret_access_key=temp_creds_req['secretAccessKey'],
                        aws_session_token=temp_creds_req['sessionToken'],
                        region_name='us-west-2')

In [3]:
rio_env = rio.Env(AWSSession(session),
                  GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR',
                  GDAL_HTTP_COOKIEFILE=os.path.expanduser('~/cookies.txt'),
                  GDAL_HTTP_COOKIEJAR=os.path.expanduser('~/cookies.txt'))
rio_env.__enter__()

<rasterio.env.Env at 0x7ff6edb58cd0>

In [20]:
# read the .csv file with S3 links
stack_df = pandas.read_csv('/home/jovyan/nch21_hls_timeseries/HLS_data/T13TDE/stack.csv')
stack_df = stack_df.loc[~stack_df['date'].isna(), :]
stack_df.reset_index(inplace=True)
stack_df

Unnamed: 0,index,S3_links,local_links,sensor,tile,date,band
0,0,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T17574...,S30,T13TDE,2021001T175741,B12
1,1,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T17574...,S30,T13TDE,2021001T175741,B02
2,2,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T17574...,S30,T13TDE,2021001T175741,B06
3,3,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T17574...,S30,T13TDE,2021001T175741,B8A
4,4,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021001T17574...,S30,T13TDE,2021001T175741,B08
...,...,...,...,...,...,...,...
1001,1055,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021316T17555...,S30,T13TDE,2021316T175559,Fmask
1002,1056,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021316T17555...,S30,T13TDE,2021316T175559,VZA
1003,1057,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021316T17555...,S30,T13TDE,2021316T175559,SAA
1004,1058,s3://lp-prod-protected/HLSS30.020/HLS.S30.T13T...,./HLS_data/T13TDE/HLS.S30.T13TDE.2021316T17555...,S30,T13TDE,2021316T175559,B12


In [52]:
# subset the s3 links by band
header = ['label', 'L30_band', 'S30_band', 'read']
data = [
    ['coastal_aerosol', 'B01', 'B01', False],
    ['blue', 'B02', 'B02', True],
    ['green', 'B03', 'B03', True],
    ['red', 'B04', 'B04', True],
    ['red-edge_1', None, 'B05', False],
    ['red-edge_2', None, 'B06', False],
    ['red-edge_3', None, 'B07', False],
    ['nir_broad', None, 'B08', False],
    ['nir', 'B05', 'B8A', True],
    ['swir_1', 'B06', 'B11', True],
    ['swir_2', 'B07', 'B12', True],
    ['water_vapor', None, 'B09', False],
    ['cirrus', 'B09', 'B10', False],
    ['thermal_infrared_1', 'B10', None, False],
    ['thermal_infrared_2', 'B11', None, False],
    ['fmask', 'Fmask', 'Fmask', True]
]

band_df = pandas.DataFrame(data, columns=header)
band_df

Unnamed: 0,label,L30_band,S30_band,read
0,coastal_aerosol,B01,B01,False
1,blue,B02,B02,True
2,green,B03,B03,True
3,red,B04,B04,True
4,red-edge_1,,B05,False
5,red-edge_2,,B06,False
6,red-edge_3,,B07,False
7,nir_broad,,B08,False
8,nir,B05,B8A,True
9,swir_1,B06,B11,True


In [53]:
chunks=dict(band=1, x=256, y=256)

In [54]:
%%time

hls_ds = None

for i in range(0, band_df.shape[0]):
#for i in [8, 9]:
    if band_df.loc[i, 'read'] == True:
        # subset stack for links for each band
        band_stack = stack_df.loc[
        ((stack_df['band'] == band_df.loc[i,'L30_band']) & (stack_df['sensor'] == 'L30')) |
        ((stack_df['band'] == band_df.loc[i,'S30_band']) & (stack_df['sensor'] == 'S30')), :]
        
        # create the time index
        band_time = [datetime.strptime(str(t), '%Y%jT%H%M%S') for t in band_stack['date']]
        xr.Variable('time', band_time)

        s3_links = band_stack['S3_links']
        
        # get the band label
        band_label = band_df.loc[i, 'label']
        
        # open the links
        hls_ts_da = xr.concat([rioxarray.open_rasterio(f, chunks=chunks).squeeze('band', drop=True) for f in s3_links], dim=band_time)
        hls_ts_da.rename({'concat_dim':'time'})
        
        if hls_ds is None:
            hls_ds = xr.Dataset({band_label: hls_ts_da})
        else:
            hls_ds[band_label] = hls_ts_da

hls_ds

Time to read S3 HLS data: 26 (s)


Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,702.63 MiB,64.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,uint8,numpy.ndarray
"Array Chunk Bytes 702.63 MiB 64.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type uint8 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,702.63 MiB,64.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,uint8,numpy.ndarray


In [61]:
%%time

def SI(b1, b2):
    si = (b1 - b2) / (b1 + b2)
    si = xr.where(si < -1.0, -1.0, si)
    si = xr.where(si > 1.0, 1.0, si)
    si = xr.where(np.isfinite(si), si, np.nan)
    return(si)

# calculate NDVI
hls_ds['ndvi'] = SI(hls_ds['nir'], hls_ds['red'])

# calculate NBR
hls_ds['nbr'] = SI(hls_ds['nir'], hls_ds['swir_2'])
hls_ds

CPU times: user 40.2 ms, sys: 2 µs, total: 40.2 ms
Wall time: 39.8 ms


Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 1.37 GiB 128.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type int16 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,1.37 GiB,128.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,702.63 MiB,64.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,uint8,numpy.ndarray
"Array Chunk Bytes 702.63 MiB 64.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 49555 Tasks 12375 Chunks Type uint8 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,702.63 MiB,64.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,49555 Tasks,12375 Chunks
Type,uint8,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.49 GiB,512.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,210485 Tasks,12375 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 5.49 GiB 512.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 210485 Tasks 12375 Chunks Type float64 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,5.49 GiB,512.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,210485 Tasks,12375 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.49 GiB,512.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,210485 Tasks,12375 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 5.49 GiB 512.00 kiB Shape (55, 3660, 3660) (1, 256, 256) Count 210485 Tasks 12375 Chunks Type float64 numpy.ndarray",3660  3660  55,

Unnamed: 0,Array,Chunk
Bytes,5.49 GiB,512.00 kiB
Shape,"(55, 3660, 3660)","(1, 256, 256)"
Count,210485 Tasks,12375 Chunks
Type,float64,numpy.ndarray


In [None]:
%%time

# convert FMask to integer values representing water, snow/ice, clouds, shadows 

# mask bands, ndvi, nbr to numpy.nan where there are water, snow/ice, clouds, shadows, etc