# Using Dask to process MEaSUREs Gridded Sea Surface Height Anomalies Version 1812


In [None]:
import xarray as xr
import pprint
import h5py

#Short_name is used to identify a specific dataset in NASA Earthdata. 
short_name='SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL1812'

In [None]:
def init_S3FileSystem():
    import requests,s3fs
    s3_cred_endpoint = {
        'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials',
        'lpdaac':'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'}

    temp_creds_url = s3_cred_endpoint['podaac']
    creds = requests.get(temp_creds_url).json()
    s3 = s3fs.S3FileSystem(anon=False,
                           key=creds['accessKeyId'],
                           secret=creds['secretAccessKey'], 
                           token=creds['sessionToken'])
    return s3

def return_token():
    import requests
    s3_cred_endpoint = {
        'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials',
        'lpdaac':'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials'}

    temp_creds_url = s3_cred_endpoint['podaac']
    creds = requests.get(temp_creds_url).json()
    return creds

In [None]:
s3sys=init_S3FileSystem()
fns=s3sys.glob("s3://podaac-ops-cumulus-protected/%s/*.nc"%short_name)


In [None]:
d=xr.open_dataset(s3sys.open(fns[0]))
d['SLA'].mean()

In [None]:
def calc_mean(fn_s3,s3sys):
    d=xr.open_dataset(s3sys.open(fn_s3))
    return d['SLA'].mean().data

In [None]:
from dask.distributed import Client
from dask import delayed,compute
client = Client(n_workers=2)

In [None]:
client

In [None]:
print('There are %i files.'%len(fns))

In [None]:
%%time

result=[]

for fn in fns[:100]:
    result.append(delayed(calc_mean)(fn,s3sys) )

output=compute(result)
print(output)


In [None]:
%%time

result=[]

for fn in fns[:100]:
    result.append(calc_mean(fn,s3sys) )
print(result)