In [1]:
import s3fs
from os.path import dirname, join
import time
import xarray as xr
import numpy as np
# import matplotlib.pyplot as plt
# import cartopy.crs as ccrs
# import cartopy.feature as cfeature
# import requests
# import boto3
# from pprint import pprint
# from pyresample import kd_tree, geometry, utils
# from pyresample.geometry import GridDefinition
# from pathlib import Path
# import os

### Confirm Existence of .netrc file in your home directory

In [2]:
# make a .netrc file in your home directory with the following
# machine urs.earthdata.nasa.gov login ifenty password XCfK5QhgEGuWVgu4qRuH
# for login and password use your EarthData login

# if this command returns 1, you are good

In [3]:
!cat ~/.netrc | grep 'urs.earthdata.nasa.gov' | wc -l

1


### Get credentials

In [4]:
%%capture
import requests

def store_aws_keys(endpoint: str="https://archive.podaac.earthdata.nasa.gov/s3credentials"):    
    with requests.get(endpoint, "w") as r:
        accessKeyId, secretAccessKey, sessionToken, expiration = list(r.json().values())

    creds ={}
    creds['AccessKeyId'] = accessKeyId
    creds['SecretAccessKey'] = secretAccessKey
    creds['SessionToken'] = sessionToken
    creds['expiration'] = expiration
    
    return creds

creds = store_aws_keys()
print(creds)

In [5]:
print(f"\nThe current session token expires at {creds['expiration']}.\n")


The current session token expires at 2024-11-07 23:54:54+00:00.



## Make a "direct connection" to the S3 file system

In [6]:
ShortName1 = "OSCAR_L4_OC_FINAL_V2.0"
ShortName2 = "OSCAR_L4_OC_INTERIM_V2.0"

In [7]:
# Ask PODAAC for the collection id1
response = requests.get(
    url='https://cmr.earthdata.nasa.gov/search/collections.umm_json', 
    params={'provider': "POCLOUD",
            'ShortName': ShortName1,
            'page_size': 1}
)

In [8]:
s3 = s3fs.S3FileSystem(
    key=creds['AccessKeyId'],
    secret=creds['SecretAccessKey'],
    token=creds['SessionToken'],
    client_kwargs={'region_name':'us-west-2'},
)

In [9]:
# make a S3 'filesystem' object
fs1 = s3fs.S3FileSystem(anon=False,
                      key=creds['AccessKeyId'],
                      secret=creds['SecretAccessKey'],
                      token=creds['SessionToken'])

In [10]:
# Ask PODAAC for the collection id2
response = requests.get(
    url='https://cmr.earthdata.nasa.gov/search/collections.umm_json', 
    params={'provider': "POCLOUD",
            'ShortName': ShortName2,
            'page_size': 1}
)

In [11]:
s3 = s3fs.S3FileSystem(
    key=creds['AccessKeyId'],
    secret=creds['SecretAccessKey'],
    token=creds['SessionToken'],
    client_kwargs={'region_name':'us-west-2'},
)

In [12]:
# make a S3 'filesystem' object
fs2 = s3fs.S3FileSystem(anon=False,
                      key=creds['AccessKeyId'],
                      secret=creds['SecretAccessKey'],
                      token=creds['SessionToken'])

## Load all of the files for this year from AWS S3 using 'direct connection' and combine into a single xarray DataSet object

Note: this takes a minute.

In [13]:
# from dask.distributed import Client

# client = Client("tcp://127.0.0.1:38643")
# client

In [16]:
start_time = time.time()
oscar_files = fs1.glob(join("podaac-ops-cumulus-protected/", ShortName1, '*.nc'))
paths1=[fs1.open(f) for f in oscar_files]
oscar_files = fs2.glob(join("podaac-ops-cumulus-protected/", ShortName2, '*.nc'))
paths2=[fs2.open(f) for f in oscar_files]

In [None]:
oscar_final = xr.open_mfdataset(
    paths=paths1,
    combine='nested',
    concat_dim='time',
    decode_cf=True,
    coords='minimal',
    chunks={'time': 1}  
)
oscar_interim = xr.open_mfdataset(
    paths=paths2,
    combine='nested',
    concat_dim='time',
    decode_cf=True,
    coords='minimal',
    chunks={'time': 1}  
)
print(time.time() - start_time)

In [None]:
oscar_final

In [None]:
oscar_interim