In [1]:
import requests
import s3fs
from pprint import pprint
import xarray as xr
import numpy
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy
from tqdm import tqdm

#to make big figures
plt.rcParams['figure.figsize'] = 12, 6

In [2]:
#select L2 data: 
dataset='podaac-ops-cumulus-protected/MODIS_T-JPL-L2P-v2019.0'
shortname='MODIS_T-JPL-L2P-v2019.0'

#timeframe of interest:
start_date = '2015-09-15T00:00:00Z'
end_date   = '2015-09-15T23:59:59Z'

#region of interest:
lonmin = -170.0
latmin = 65.0
lonmax = -120.0
latmax = 80.0

In [3]:
CMR_OPS = 'https://cmr.earthdata.nasa.gov/search'
url = f'{CMR_OPS}/{"collections"}'

In [4]:
provider = 'POCLOUD'
response = requests.get(url,
                        params={
                            'short_name': shortname,
                            'cloud_hosted': 'True',
                            'has_granules': 'True',
                            'provider': provider,
                            'page_size': 251,
                        },
                        headers={
                            'Accept': 'application/json'
                        }
                       )
collections = response.json()['feed']['entry']
for collection in collections:
    print(f'{collection["archive_center"]} {collection["dataset_id"]} {collection["id"]}')

NASA/JPL/PODAAC GHRSST Level 2P Global Sea Surface Skin Temperature from the Moderate Resolution Imaging Spectroradiometer (MODIS) on the NASA Terra satellite (GDS2) C1940475563-POCLOUD


In [5]:
concept_id='C1940475563-POCLOUD' #get the concept ID from earthdata website, clicking on the dataset, looking at the http link: https://search.earthdata.nasa.gov/search/granules/collection-details?p=C2075141524-POCLOUD&pg[0][v]=f&pg[0][gsk]=-start_date&q=L2&ff=Available%20from%20AWS%20Cloud&fi=ASCAT&fl=2%20-%20Geophys.%20Variables%2C%20Sensor%20Coordinates&tl=1637188235.263!3!!&m=8.456485070842092!-75.234375!1!1!0!0%2C2
url = f'{CMR_OPS}/{"granules"}'
spacebox=str(lonmin)+','+str(latmin)+','+str(lonmax)+','+str(latmax) #SW lon, SW lat, NE lon, NE lat
tempbox=f'{start_date},{end_date}'
response = requests.get(url, 
                        params={
                            'concept_id': concept_id,
                            'temporal': tempbox,
                            'bounding_box': spacebox,
                            'page_size': 228,
                            },
                        headers={
                            'Accept': 'application/json'
                            }
                       )
print(response.status_code)
print(response.headers['cmr-hits'])

200
41


In [6]:
granules = response.json()['feed']['entry']
urls = []
for granule in granules:
    # print(granule['boxes'], granule['links'][0]['href'])
    urls.append(granule['links'][0]['href'])
pprint(urls[0])
pprint(urls[1])

's3://podaac-ops-cumulus-protected/MODIS_T-JPL-L2P-v2019.0/20150915011500-JPL-L2P_GHRSST-SSTskin-MODIS_T-N-v02.0-fv01.0.nc'
's3://podaac-ops-cumulus-protected/MODIS_T-JPL-L2P-v2019.0/20150915011500-JPL-L2P_GHRSST-SSTskin-MODIS_T-D-v02.0-fv01.0.nc'


In [7]:
#s3fs
s3_cred_endpoint = {
    'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials'}

def get_temp_creds():
    temp_creds_url = s3_cred_endpoint['podaac']
    return requests.get(temp_creds_url).json()

temp_creds_req = get_temp_creds()

s3_client = s3fs.S3FileSystem(anon=False,key=temp_creds_req['accessKeyId'], secret=temp_creds_req['secretAccessKey'], token=temp_creds_req['sessionToken'])

In [8]:
ds = xr.open_dataset(s3_client.open('podaac-ops-cumulus-protected/MODIS_T-JPL-L2P-v2019.0/20150915011500-JPL-L2P_GHRSST-SSTskin-MODIS_T-N-v02.0-fv01.0.nc'), decode_cf=False) #issue when trying to open because of some CF conventions (?) so we add decode_cf=False
ds

In [9]:
# for file in urls:
#     ds = xr.open_dataset(s3_client.open(file), decode_cf=False) #issue when trying to open because of some CF conventions (?) so we add decode_cf=False
#     pprint(ds.sea_surface_temperature.shape)

In [10]:
fileset = [s3_client.open(file) for file in urls[0:10]] 
ds_stacked = xr.open_mfdataset(fileset, combine='by_coords', 
                               decode_cf=False,)
ds_stacked

ValueError: arguments without labels along dimension 'nj' cannot be aligned because they have different dimension sizes: {2040, 2030}

In [25]:
def _open_mfdataset(urls: list): #from Jack, a simple concatenation with mfdataset was not working; have to put decode_cf=False because this dataset has a pb

    def _open_dataset(x):
        return xr.open_dataset(s3_client.open(x), 
                               decode_cf=False,
                               decode_coords=True,
                               mask_and_scale=True, )

    return xr.concat(list(map(lambda x: _open_dataset(x), tqdm(urls))), 
                     dim="time",
                     data_vars={"sea_surface_temperature"},#{"minimal"}, #['sea_surface_temperature'],
                     fill_value=numpy.nan, ).set_coords(['lat', 'lon'])
ds = _open_mfdataset(sorted(urls))

print(ds)

100%|██████████| 41/41 [00:07<00:00,  5.62it/s]


ValueError: arguments without labels along dimension 'nj' cannot be aligned because they have different dimension sizes: {2040, 2030}

In [None]:
fileset = [s3_client.open(file) for file in urls] 
ds_stacked = xr.open_mfdataset(fileset,data_vars=['analysed_sst'],engine="h5netcdf",
    combine="by_coords",
    decode_coords=True,
    mask_and_scale=True,
    decode_cf=False,)
ds_stacked
# ds_stacked = xr.open_mfdataset(fileset, combine='by_coords')
# ds_stacked = xr.open_mfdataset(fileset,data_vars=['analysed_sst'],engine="h5netcdf",
#     combine="nested",
#     concat_dim="time",
#     decode_coords=True,
#     mask_and_scale=True,
#     decode_cf=False,)