In [1]:
from CMRQuery import CMRQuery

### Initialize the query object, login to Earthdata and set the authentication token

In [2]:
cmr_query = CMRQuery()

cmr_query.login_and_set_token()

### Get the shortnames for all L4 collections containing 'sst' or 'sea surface temperature' keywords

In [3]:
shortnames = cmr_query.query_collections_by_keyword(
                            provider='POCLOUD',
                            keywords=['sst', 'sea surface temperature'],
                            processinglevel='4')

for s in shortnames:
    print(s)

CMC0.1deg-CMC-L4-GLOB-v3.0
MUR-JPL-L4-GLOB-v4.1
K10_SST-NAVO-L4-GLOB-v01
MUR25-JPL-L4-GLOB-v04.2
AVHRR_OI-NCEI-L4-GLOB-v2.0
AVHRR_OI-NCEI-L4-GLOB-v2.1
CMC0.2deg-CMC-L4-GLOB-v2.0
DMI_OI-DMI-L4-GLOB-v1.0
GAMSSA_28km-ABOM-L4-GLOB-v01
Geo_Polar_Blended-OSPO-L4-GLOB-v1.0
Geo_Polar_Blended_Night-OSPO-L4-GLOB-v1.0
MITgcm_LLC4320_Pre-SWOT_JPL_L4_BassStrait_v1.0
MW_IR_OI-REMSS-L4-GLOB-v5.0
MW_IR_OI-REMSS-L4-GLOB-v5.1
MW_OI-REMSS-L4-GLOB-v5.0
MW_OI-REMSS-L4-GLOB-v5.1
OISST_HR_NRT-GOS-L4-BLK-v2.0
OISST_HR_NRT-GOS-L4-MED-v2.0
OISST_UHR_NRT-GOS-L4-BLK-v2.0
OISST_UHR_NRT-GOS-L4-MED-v2.0
OSTIA-UKMO-L4-GLOB-v2.0
RAMSSA_09km-ABOM-L4-AUS-v01
REMO_OI_SST_5km-UFRJ-L4-SAMERICA-v1.0
REYNOLDS_NCDC_L4_MONTHLY_V5


### get the granules belonging to each collection found above, within a time range

In [4]:
granule_URLs = cmr_query.query_granules_by_shortname(
                            shortnames=shortnames, 
                            provider="POCLOUD", 
                            temporal_range="2022-07-18T00:00:00Z,2022-07-18T23:59:59Z")

for g in granule_URLs:
    print(g)

s3://podaac-ops-cumulus-protected/CMC0.1deg-CMC-L4-GLOB-v3.0/20220717120000-CMC-L4_GHRSST-SSTfnd-CMC0.1deg-GLOB-v02.0-fv03.0.nc
s3://podaac-ops-cumulus-protected/CMC0.1deg-CMC-L4-GLOB-v3.0/20220718120000-CMC-L4_GHRSST-SSTfnd-CMC0.1deg-GLOB-v02.0-fv03.0.nc
s3://podaac-ops-cumulus-protected/MUR-JPL-L4-GLOB-v4.1/20220718090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1.nc
s3://podaac-ops-cumulus-protected/MUR-JPL-L4-GLOB-v4.1/20220719090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1.nc
s3://podaac-ops-cumulus-protected/K10_SST-NAVO-L4-GLOB-v01/20220718000000-NAVO-L4_GHRSST-SST1m-K10_SST-GLOB-v02.0-fv01.0.nc
s3://podaac-ops-cumulus-protected/MUR25-JPL-L4-GLOB-v04.2/20220718090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
s3://podaac-ops-cumulus-protected/MUR25-JPL-L4-GLOB-v04.2/20220719090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
s3://podaac-ops-cumulus-protected/AVHRR_OI-NCEI-L4-GLOB-v2.1/20220717120000-NCEI-L4_GHRSST-SSTblend-AVHRR_OI-GLOB-v02.0-fv02.1.nc
s3://podaac-ops-

In [None]:
# NEED TO ADD A BLOCK THAT COPIES GRANULES TO USER S3 BUCKET



# Below here is the lambda code

In [16]:
import requests
import s3fs
import xarray as xr
import numpy as np
import h5netcdf # don't actually need to import but must be installed

s3_cred_endpoint = {
    'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials'
}

def get_temp_creds(provider):
    return requests.get(s3_cred_endpoint[provider]).json()

temp_creds_req = get_temp_creds('podaac')

s3_client = s3fs.S3FileSystem(
        anon=False, 
        key=temp_creds_req['accessKeyId'], 
        secret=temp_creds_req['secretAccessKey'], 
        token=temp_creds_req['sessionToken']
    )


In [17]:
def regrid(data_in, resolution=2):
    """
    Resample the global SST data specified in data_in

    Parameters
    ==========
    data_in: ndarray xarray with dimension (lat, lon)
             Sea surface temperature
    resolution: scalar
             the output resolution, default at 1-degree

    Return
    ======
    data_out: ndarray, xarray
             the resmapled SST at the specified resolution
    """
    
    return data_in.interp(lat=np.arange(-90,90,resolution)).interp(lon=np.arange(-180,180,resolution))

In [19]:
#def lambda_handler(event, context):
    
    # get the bucket and key of the current granule
    # bucket = event['Records'][0]['s3']['bucket']['name']
    # key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
    
    # s3_file_obj = s3_client.open('s3://' + bucket + key, mode='rb')
    
def lambda_handler(key):

    # open the granule as an s3 obj
    s3_file_obj = s3_client.open(key, mode='rb')
        
    # open in in xarray
    ds = xr.open_dataset(s3_file_obj, engine='h5netcdf')
        
    # process the function
    ds_results = regrid(ds)
        
    # create the temp path to write results to
    tmp_file_path = '/tmp/' + key[-3] + '_regrid.nc'
        
    # write the results to a new netcdf file
    ds_results.to_netcdf(tmp_file_path, mode='w')
    
    # put the results back in the same bucket
    # result_bucket = 's3://' + bucket + key
    
    # send back to the s3 bucket
    # s3_file_obj_new = s3_client.put(tmp_file_path, result_bucket)
    
    print(ds_results)

In [20]:
# test the function

gran = "s3://podaac-ops-cumulus-protected/REYNOLDS_NCDC_L4_MONTHLY_V5/ersst.v5.202207.nc"

r = lambda_handler(gran)

NameError: name 'bucket' is not defined

### clean up and delete token

In [None]:
cmr_query.delete_token()