In [1]:
import s3fs
from os.path import dirname, join
import time
import xarray as xr
import requests
import datetime
import numpy as np

# Subroutines

In [2]:
def make_encodings(G, netcdf_fill_value=-999, extra_prints=True):
    # G is the xarray dataset
    # PROVIDE SPECIFIC ENCODING DIRECTIVES FOR EACH DATA VAR
    
    dv_encoding = {}
    for dv in G.data_vars:
        dv_encoding[dv] = {'zlib':True,
                           'complevel':5,
                           'shuffle':True,
                           'dtype':'float32',
                           '_FillValue':netcdf_fill_value}


        # overwrite default coordinates attribute (PODAAC REQUEST)
        #G[dv].encoding['coordinates'] = dv_coordinate_attrs[dv]


    # PROVIDE SPECIFIC ENCODING DIRECTIVES FOR EACH COORDINATE
    if extra_prints: print('\n... creating coordinate encodings')
    coord_encoding = {}


    for coord in G.coords:
        # default encoding: no fill value, float32
        coord_encoding[coord] = {'_FillValue':None, 'dtype':'float32'}


        if (G[coord].values.dtype == np.int32) or (G[coord].values.dtype == np.int64):
            coord_encoding[coord]['dtype'] ='int32'


        if coord == 'time' or coord == 'time_bnds':
            coord_encoding[coord]['dtype'] ='int32'


            if 'units' in G[coord].attrs:
                # apply units as encoding for time
                coord_encoding[coord]['units'] = G[coord].attrs['units']
                # delete from the attributes list
                del G[coord].attrs['units']


        elif coord == 'time_step':
            coord_encoding[coord]['dtype'] ='int32'


    # MERGE ENCODINGS for coordinates and variables
    encoding = {**dv_encoding, **coord_encoding}

    return encoding

# Dask

In [3]:
from dask.distributed import Client, LocalCluster
client = Client("tcp://100.104.70.36:8786")
client

0,1
Connection method: Direct,
Dashboard: http://100.104.70.36:8787/status,

0,1
Comm: tcp://100.104.70.36:8786,Workers: 16
Dashboard: http://100.104.70.36:8787/status,Total threads: 16
Started: 10 minutes ago,Total memory: 61.97 GiB

0,1
Comm: tcp://100.104.70.36:32929,Total threads: 1
Dashboard: http://100.104.70.36:45423/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:42163,
Local directory: /tmp/dask-scratch-space/worker-o2evywvq,Local directory: /tmp/dask-scratch-space/worker-o2evywvq
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 288.38 MiB,Spilled bytes: 0 B
Read bytes: 18.56 kiB,Write bytes: 18.62 kiB

0,1
Comm: tcp://100.104.70.36:34521,Total threads: 1
Dashboard: http://100.104.70.36:46677/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:44503,
Local directory: /tmp/dask-scratch-space/worker-btbrvbia,Local directory: /tmp/dask-scratch-space/worker-btbrvbia
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 290.43 MiB,Spilled bytes: 0 B
Read bytes: 15.28 kiB,Write bytes: 15.34 kiB

0,1
Comm: tcp://100.104.70.36:36493,Total threads: 1
Dashboard: http://100.104.70.36:44529/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:33029,
Local directory: /tmp/dask-scratch-space/worker-jxhavosi,Local directory: /tmp/dask-scratch-space/worker-jxhavosi
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 291.89 MiB,Spilled bytes: 0 B
Read bytes: 2.26 kiB,Write bytes: 2.31 kiB

0,1
Comm: tcp://100.104.70.36:36683,Total threads: 1
Dashboard: http://100.104.70.36:44471/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:37727,
Local directory: /tmp/dask-scratch-space/worker-j83p8oo2,Local directory: /tmp/dask-scratch-space/worker-j83p8oo2
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 284.44 MiB,Spilled bytes: 0 B
Read bytes: 21.82 kiB,Write bytes: 21.87 kiB

0,1
Comm: tcp://100.104.70.36:37691,Total threads: 1
Dashboard: http://100.104.70.36:37757/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:35463,
Local directory: /tmp/dask-scratch-space/worker-sgi1r2tm,Local directory: /tmp/dask-scratch-space/worker-sgi1r2tm
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 295.65 MiB,Spilled bytes: 0 B
Read bytes: 8.76 kiB,Write bytes: 8.82 kiB

0,1
Comm: tcp://100.104.70.36:37701,Total threads: 1
Dashboard: http://100.104.70.36:38363/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:33739,
Local directory: /tmp/dask-scratch-space/worker-4qgsyea9,Local directory: /tmp/dask-scratch-space/worker-4qgsyea9
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 281.71 MiB,Spilled bytes: 0 B
Read bytes: 3.88 kiB,Write bytes: 3.94 kiB

0,1
Comm: tcp://100.104.70.36:38241,Total threads: 1
Dashboard: http://100.104.70.36:39613/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:46045,
Local directory: /tmp/dask-scratch-space/worker-bn2r4cwx,Local directory: /tmp/dask-scratch-space/worker-bn2r4cwx
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 281.04 MiB,Spilled bytes: 0 B
Read bytes: 12.03 kiB,Write bytes: 12.08 kiB

0,1
Comm: tcp://100.104.70.36:38489,Total threads: 1
Dashboard: http://100.104.70.36:36867/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:41689,
Local directory: /tmp/dask-scratch-space/worker-x0kw0_h9,Local directory: /tmp/dask-scratch-space/worker-x0kw0_h9
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 285.72 MiB,Spilled bytes: 0 B
Read bytes: 25.05 kiB,Write bytes: 25.11 kiB

0,1
Comm: tcp://100.104.70.36:38491,Total threads: 1
Dashboard: http://100.104.70.36:35303/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:33075,
Local directory: /tmp/dask-scratch-space/worker-c9vfnmn5,Local directory: /tmp/dask-scratch-space/worker-c9vfnmn5
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 277.04 MiB,Spilled bytes: 0 B
Read bytes: 27.04 kiB,Write bytes: 27.10 kiB

0,1
Comm: tcp://100.104.70.36:38919,Total threads: 1
Dashboard: http://100.104.70.36:44781/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:36833,
Local directory: /tmp/dask-scratch-space/worker-u39tcjtj,Local directory: /tmp/dask-scratch-space/worker-u39tcjtj
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 283.30 MiB,Spilled bytes: 0 B
Read bytes: 7.15 kiB,Write bytes: 7.21 kiB

0,1
Comm: tcp://100.104.70.36:39635,Total threads: 1
Dashboard: http://100.104.70.36:46097/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:32983,
Local directory: /tmp/dask-scratch-space/worker-vupnp5zu,Local directory: /tmp/dask-scratch-space/worker-vupnp5zu
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 287.95 MiB,Spilled bytes: 0 B
Read bytes: 5.52 kiB,Write bytes: 5.57 kiB

0,1
Comm: tcp://100.104.70.36:39761,Total threads: 1
Dashboard: http://100.104.70.36:44351/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:40685,
Local directory: /tmp/dask-scratch-space/worker-jvan31l0,Local directory: /tmp/dask-scratch-space/worker-jvan31l0
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 284.18 MiB,Spilled bytes: 0 B
Read bytes: 10.42 kiB,Write bytes: 10.47 kiB

0,1
Comm: tcp://100.104.70.36:41521,Total threads: 1
Dashboard: http://100.104.70.36:44289/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:44199,
Local directory: /tmp/dask-scratch-space/worker-jbjsj_rd,Local directory: /tmp/dask-scratch-space/worker-jbjsj_rd
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 286.48 MiB,Spilled bytes: 0 B
Read bytes: 15.62 kiB,Write bytes: 15.67 kiB

0,1
Comm: tcp://100.104.70.36:42435,Total threads: 1
Dashboard: http://100.104.70.36:42959/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:42539,
Local directory: /tmp/dask-scratch-space/worker-24g2fslw,Local directory: /tmp/dask-scratch-space/worker-24g2fslw
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 283.44 MiB,Spilled bytes: 0 B
Read bytes: 20.18 kiB,Write bytes: 20.23 kiB

0,1
Comm: tcp://100.104.70.36:46001,Total threads: 1
Dashboard: http://100.104.70.36:38341/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:43089,
Local directory: /tmp/dask-scratch-space/worker-pdbmh_lm,Local directory: /tmp/dask-scratch-space/worker-pdbmh_lm
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 294.27 MiB,Spilled bytes: 0 B
Read bytes: 23.44 kiB,Write bytes: 23.50 kiB

0,1
Comm: tcp://100.104.70.36:46695,Total threads: 1
Dashboard: http://100.104.70.36:46697/status,Memory: 3.87 GiB
Nanny: tcp://100.104.70.36:36549,
Local directory: /tmp/dask-scratch-space/worker-7ff91_yj,Local directory: /tmp/dask-scratch-space/worker-7ff91_yj
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 285.52 MiB,Spilled bytes: 0 B
Read bytes: 13.67 kiB,Write bytes: 13.72 kiB


In [3]:
!cat ~/.netrc | grep 'urs.earthdata.nasa.gov' | wc -l

1


In [4]:
def store_aws_keys(endpoint: str="https://archive.podaac.earthdata.nasa.gov/s3credentials"):    
    with requests.get(endpoint, "w") as r:
        accessKeyId, secretAccessKey, sessionToken, expiration = list(r.json().values())

    creds ={}
    creds['AccessKeyId'] = accessKeyId
    creds['SecretAccessKey'] = secretAccessKey
    creds['SessionToken'] = sessionToken
    creds['expiration'] = expiration
    
    return creds

creds = store_aws_keys()

ValueError: not enough values to unpack (expected 4, got 1)

In [None]:
print(f"\nThe current session token expires at {creds['expiration']}.\n")
print(datetime.datetime.now())

In [5]:
ShortName = "OSCAR_L4_OC_FINAL_V2.0"

response = requests.get(
    url='https://cmr.earthdata.nasa.gov/search/collections.umm_json', 
    params={'provider': "POCLOUD",
            'ShortName': ShortName,
            'page_size': 1}
)
s3 = s3fs.S3FileSystem(
    key=creds['AccessKeyId'],
    secret=creds['SecretAccessKey'],
    token=creds['SessionToken'],
    client_kwargs={'region_name':'us-west-2'},
)
fs = s3fs.S3FileSystem(anon=False,
                      key=creds['AccessKeyId'],
                      secret=creds['SecretAccessKey'],
                      token=creds['SessionToken'])

NameError: name 'creds' is not defined

In [8]:
year=1993
month=1
day=1
oscar_files = fs.glob(join("podaac-ops-cumulus-protected/", ShortName, '*'+str(year)+str(month).zfill(2)+str(day).zfill(2)+'*.nc'))
paths=[fs.open(f) for f in oscar_files]
paths

[<File-like object S3FileSystem, podaac-ops-cumulus-protected/OSCAR_L4_OC_FINAL_V2.0/oscar_currents_final_19930101.nc>]

In [9]:
start_time = time.time()
oscar_final = xr.open_mfdataset(paths,
                                parallel=True, 
                                data_vars='minimal', 
                                coords='minimal',
                                compat='override')

print(time.time() - start_time)
print((time.time() - start_time)/len(paths))

2.1245386600494385
2.1246860027313232


In [10]:
encodings = make_encodings(oscar_final)


... creating coordinate encodings


In [11]:
encodings

{'u': {'zlib': True,
  'complevel': 5,
  'shuffle': True,
  'dtype': 'float32',
  '_FillValue': -999},
 'v': {'zlib': True,
  'complevel': 5,
  'shuffle': True,
  'dtype': 'float32',
  '_FillValue': -999},
 'ug': {'zlib': True,
  'complevel': 5,
  'shuffle': True,
  'dtype': 'float32',
  '_FillValue': -999},
 'vg': {'zlib': True,
  'complevel': 5,
  'shuffle': True,
  'dtype': 'float32',
  '_FillValue': -999},
 'lat': {'_FillValue': None, 'dtype': 'float32'},
 'lon': {'_FillValue': None, 'dtype': 'float32'},
 'time': {'_FillValue': None, 'dtype': 'int32'}}

In [12]:
oscar_final = oscar_final.load()

In [13]:
oscar_final

In [14]:
oscar_final.to_netcdf('/home/jpluser/efs-mount-point/sevfour/oscar/encodedfiles/test.nc', 
                      encoding=encodings)