## Accessing data with an http or opendap URL is very easy

In [2]:
import xarray as xr

In [3]:
# Direct access data from kage:
# Note, use your browser to explore http://kage.ldeo.columbia.edu/data/pdsi-spei/

url = 'http://kage.ldeo.columbia.edu/data/pdsi-spei/pdsi/GFDL_ESM2G/pdsi_all.nc#mode=bytes'
ds = xr.open_dataset(url,decode_times=False)
print(ds)
#ds.pdsi_all.mean('T').plot()

<xarray.Dataset>
Dimensions:   (T: 2400, ensemble: 1, lat: 75, lon: 144)
Coordinates:
  * lon       (lon) float32 1.25 3.75 6.25 8.75 ... 351.2 353.8 356.2 358.8
  * lat       (lat) float32 -59.66 -57.64 -55.62 -53.6 ... 85.96 87.98 89.49
  * ensemble  (ensemble) float32 1.0
  * T         (T) float32 0.5 1.5 2.5 3.5 ... 2.398e+03 2.398e+03 2.4e+03
Data variables:
    pdsi_all  (T, ensemble, lat, lon) float64 ...


In [3]:
# Direct access CMIP6 from mary:

url = 'http://mary.ldeo.columbia.edu/CMIP6/CMIP/BCC/BCC-ESM1/historical/r1i1p1f1/Amon/clt/gn/v20181214/\
clt_Amon_BCC-ESM1_historical_r1i1p1f1_gn_185001-201412.nc#mode=bytes'

xr.open_dataset(url)

In [4]:
# Can also use an OPeNDAP url:

url = 'https://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/ScenarioMIP/AS-RCEC/TaiESM1/ssp370/r1i1p1f1/day/tasmax/gn/v20210323/tasmax_day_TaiESM1_ssp370_r1i1p1f1_gn_20150101-20241231.nc'
xr.open_dataset(url)

In [None]:
# Globus - NOPE, must set up endpoints, etc.

url = 'globus:415a6320-e49c-11e5-9798-22000b9da45e/css03_data/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp370/r6i1p1f1/day\
/tasmax/gr/v20190614/tasmax_day_IPSL-CM6A-LR_ssp370_r6i1p1f1_gr_20150101-21001231.nc'
# xr.open_dataset(url)

In [23]:
# Google Cloud Store, Pangeo zarr collection

path = 'CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710'

url = f'https://cmip6.storage.googleapis.com/{path}'
ds = xr.open_zarr(url,consolidated=True)
print('size of dataset:',ds.nbytes/1e9,'G')
ds

size of dataset: 18.527722504 G


Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(96, 2)","(96, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.54 kB 1.54 kB Shape (96, 2) (96, 2) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2  96,

Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(96, 2)","(96, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 kB,3.07 kB
Shape,"(192, 2)","(192, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.07 kB 3.07 kB Shape (192, 2) (192, 2) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2  192,

Unnamed: 0,Array,Chunk
Bytes,3.07 kB,3.07 kB
Shape,"(192, 2)","(192, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,251.30 kB
Shape,"(31411, 2)","(15706, 2)"
Count,3 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 502.58 kB 251.30 kB Shape (31411, 2) (15706, 2) Count 3 Tasks 2 Chunks Type datetime64[ns] numpy.ndarray",2  31411,

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,251.30 kB
Shape,"(31411, 2)","(15706, 2)"
Count,3 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.53 GB,83.76 MB
Shape,"(31411, 8, 96, 192)","(142, 8, 96, 192)"
Count,223 Tasks,222 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 18.53 GB 83.76 MB Shape (31411, 8, 96, 192) (142, 8, 96, 192) Count 223 Tasks 222 Chunks Type float32 numpy.ndarray",31411  1  192  96  8,

Unnamed: 0,Array,Chunk
Bytes,18.53 GB,83.76 MB
Shape,"(31411, 8, 96, 192)","(142, 8, 96, 192)"
Count,223 Tasks,222 Chunks
Type,float32,numpy.ndarray


In [24]:
# Amazon S3, Pangeo zarr collection (mirror of GCS)

path = 'CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710'

url = f'https://cmip6-pds.s3.amazonaws.com/{path}'
ds = xr.open_zarr(url,consolidated=True)
print('size of dataset:',ds.nbytes/1e9,'G')
ds

size of dataset: 18.527722504 G


Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(96, 2)","(96, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.54 kB 1.54 kB Shape (96, 2) (96, 2) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2  96,

Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(96, 2)","(96, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 kB,3.07 kB
Shape,"(192, 2)","(192, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.07 kB 3.07 kB Shape (192, 2) (192, 2) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2  192,

Unnamed: 0,Array,Chunk
Bytes,3.07 kB,3.07 kB
Shape,"(192, 2)","(192, 2)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,251.30 kB
Shape,"(31411, 2)","(15706, 2)"
Count,3 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 502.58 kB 251.30 kB Shape (31411, 2) (15706, 2) Count 3 Tasks 2 Chunks Type datetime64[ns] numpy.ndarray",2  31411,

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,251.30 kB
Shape,"(31411, 2)","(15706, 2)"
Count,3 Tasks,2 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.53 GB,83.76 MB
Shape,"(31411, 8, 96, 192)","(142, 8, 96, 192)"
Count,223 Tasks,222 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 18.53 GB 83.76 MB Shape (31411, 8, 96, 192) (142, 8, 96, 192) Count 223 Tasks 222 Chunks Type float32 numpy.ndarray",31411  1  192  96  8,

Unnamed: 0,Array,Chunk
Bytes,18.53 GB,83.76 MB
Shape,"(31411, 8, 96, 192)","(142, 8, 96, 192)"
Count,223 Tasks,222 Chunks
Type,float32,numpy.ndarray


In [38]:
# Amazon S3, GFDL netcdf collection

# can open a single file like this, but there is a better method to read from the S3 file system directly, see below

url = 'https://esgf-world.s3.amazonaws.com/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/abrupt-4xCO2/r1i1p1f1/Lmon/cLeaf/gr/v20190118/cLeaf_Lmon_IPSL-CM6A-LR_abrupt-4xCO2_r1i1p1f1_gr_185001-214912.nc#mode=bytes'
ds = xr.open_dataset(url, decode_coords=False)
ds

<xarray.Dataset>
Dimensions:      (axis_nbounds: 2, lat: 143, lon: 144, time: 3600)
Coordinates:
  * lat          (lat) float32 -90.0 -88.73 -87.46 -86.2 ... 87.46 88.73 90.0
  * lon          (lon) float32 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5
  * time         (time) datetime64[ns] 1850-01-16T12:00:00 ... 2149-12-16T12:...
Dimensions without coordinates: axis_nbounds
Data variables:
    time_bounds  (time, axis_nbounds) datetime64[ns] ...
    cLeaf        (time, lat, lon) float32 ...
Attributes:
    name:                   /ccc/work/cont003/gencmip6/p86maf/IGCM_OUT/IPSLCM...
    Conventions:            CF-1.7 CMIP-6.2
    creation_date:          2018-05-03T13:17:14Z
    description:            DECK: abrupt-4xCO2
    title:                  IPSL-CM6A-LR model output prepared for CMIP6 / CM...
    activity_id:            CMIP
    contact:                ipsl-cmip6@listes.ipsl.fr
    data_specs_version:     01.00.21
    dr2xml_version:         1.3
    experiment_id:          ab

## We can also instantiate a file system

In [4]:
import xarray as xr
import fsspec

fs_http = fsspec.filesystem('http')
files = fs_http.glob('http://kage.ldeo.columbia.edu/data/ERA5/monthly/single_level/*/2m_*.nc')
files

['http://kage.ldeo.columbia.edu/data/ERA5/monthly/single_level/y1950-1978/2m_dewpoint_temperature.nc',
 'http://kage.ldeo.columbia.edu/data/ERA5/monthly/single_level/y1950-1978/2m_temperature.nc',
 'http://kage.ldeo.columbia.edu/data/ERA5/monthly/single_level/y1979-2020/2m_dewpoint_temperature.nc',
 'http://kage.ldeo.columbia.edu/data/ERA5/monthly/single_level/y1979-2020/2m_temperature.nc']

In [5]:
urls = [ file + '#mode=bytes' for file in files]
ds= xr.open_mfdataset(urls)
ds

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray


In [9]:
urls = fs_http.glob('http://kage.ldeo.columbia.edu/datasets/ERA5-monthly/single_level/*/vertical_integral*.nc')
ds = xr.open_mfdataset([url+'#mode=bytes' for url in urls],concat_dim='time',combine='by_coords')
ds

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.49 GB 2.04 GB Shape (840, 721, 1440) (492, 721, 1440) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",1440  721  840,

Unnamed: 0,Array,Chunk
Bytes,3.49 GB,2.04 GB
Shape,"(840, 721, 1440)","(492, 721, 1440)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray


In [7]:
for var in ds.data_vars:
    print('variable =',var,':  long_name =',ds[var].attrs['long_name'])

variable = p53.162 :  long_name = Vertical integral of mass of atmosphere
variable = p54.162 :  long_name = Vertical integral of temperature
variable = p59.162 :  long_name = Vertical integral of kinetic energy
variable = p60.162 :  long_name = Vertical integral of thermal energy
variable = p61.162 :  long_name = Vertical integral of potential+internal energy
variable = p62.162 :  long_name = Vertical integral of potential+internal+latent energy
variable = p63.162 :  long_name = Vertical integral of total energy
variable = p64.162 :  long_name = Vertical integral of energy conversion
variable = p65.162 :  long_name = Vertical integral of eastward mass flux
variable = p66.162 :  long_name = Vertical integral of northward mass flux
variable = p67.162 :  long_name = Vertical integral of eastward kinetic energy flux
variable = p68.162 :  long_name = Vertical integral of northward kinetic energy flux
variable = p69.162 :  long_name = Vertical integral of eastward heat flux
variable = p70.16

In [11]:
# can browse ftp sites
fs_ftp = fsspec.filesystem('ftp', host='ftp.cdc.noaa.gov') #, port=port, username=user, password=pw)
fs_ftp.glob('/Projects/Datasets/*')

['/Projects/Datasets/COBE',
 '/Projects/Datasets/COBE2',
 '/Projects/Datasets/CarbonTracker',
 '/Projects/Datasets/README',
 '/Projects/Datasets/Timeseries',
 '/Projects/Datasets/cmap',
 '/Projects/Datasets/cpc_global_precip',
 '/Projects/Datasets/cpc_global_temp',
 '/Projects/Datasets/cpc_us_hour_precip',
 '/Projects/Datasets/cpc_us_precip',
 '/Projects/Datasets/cpcsoil',
 '/Projects/Datasets/cru',
 '/Projects/Datasets/dai_pdsi',
 '/Projects/Datasets/ghcncams',
 '/Projects/Datasets/ghcngridded',
 '/Projects/Datasets/gistemp',
 '/Projects/Datasets/godas',
 '/Projects/Datasets/gpcc',
 '/Projects/Datasets/gpcp',
 '/Projects/Datasets/icoads',
 '/Projects/Datasets/icoads2.5',
 '/Projects/Datasets/interp_OLR',
 '/Projects/Datasets/jmatemp',
 '/Projects/Datasets/kaplan_sst',
 '/Projects/Datasets/mlost',
 '/Projects/Datasets/mlostv3b',
 '/Projects/Datasets/mount_check_file',
 '/Projects/Datasets/msu',
 '/Projects/Datasets/ncep',
 '/Projects/Datasets/ncep.marine',
 '/Projects/Datasets/ncep.pac

In [12]:
# can directly read in single ftp files WITHOUT downloading first
import urllib
import io

url = 'ftp://ftp.cdc.noaa.gov/Projects/Datasets/ncep.reanalysis.derived/surface/air.sig995.mon.mean.nc'
req = urllib.request.Request(url)

with urllib.request.urlopen(req) as resp:
    ds = xr.open_dataset(io.BytesIO(resp.read()))
    
ds

In [13]:
# Google Cloud Store
import gcsfs

fs_GCS = gcsfs.GCSFileSystem(token='anon',access='read_only')

fs_GCS.ls('cmip6')

['cmip6/CMIP3',
 'cmip6/CMIP5',
 'cmip6/CMIP6',
 'cmip6/DCPP',
 'cmip6/GFDL_CM2_6',
 'cmip6/cmip6-zarr-consolidated-stores-noQC.csv',
 'cmip6/cmip6-zarr-consolidated-stores-noQC.jsonl',
 'cmip6/cmip6-zarr-consolidated-stores.csv',
 'cmip6/compare',
 'cmip6/gfdl_cm2_6.json',
 'cmip6/glade-cmip6.csv.gz',
 'cmip6/gsutil_test_filelz288lpb.bin',
 'cmip6/pangeo-cmip3.csv',
 'cmip6/pangeo-cmip3.json',
 'cmip6/pangeo-cmip5.csv',
 'cmip6/pangeo-cmip5.json',
 'cmip6/pangeo-cmip6-noQC.csv',
 'cmip6/pangeo-cmip6-noQC.json',
 'cmip6/pangeo-cmip6.csv',
 'cmip6/pangeo-cmip6.json',
 'cmip6/tracmip',
 'cmip6/tracmip.csv',
 'cmip6/tracmip.json']

In [27]:
# Amazon S3
import s3fs

fs_S3 = s3fs.S3FileSystem(anon=True)

#fs_S3.ls('cmip6-pds/')

fs_S3.ls('esgf-world/CMIP6/AerChemMIP/AS-RCEC/TaiESM1/histSST/r1i1p1f1/AERmon/ps/gn/v20200310')

['esgf-world/CMIP6/AerChemMIP/AS-RCEC/TaiESM1/histSST/r1i1p1f1/AERmon/ps/gn/v20200310/ps_AERmon_TaiESM1_histSST_r1i1p1f1_gn_185001-201412.nc']

In [None]:
# Amazon S3, GFDL netcdf collection  (dataset may consist of multiple netcdf files)


fs_s3 = s3fs.S3FileSystem(anon=True)
s3_url = 's3://esgf-world/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/abrupt-4xCO2/r1i1p1f1/Lmon/cLeaf/gr/v20190118/cLeaf_Lmon_IPSL-CM6A-LR_abrupt-4xCO2_r1i1p1f1_gr_185001-214912.nc'
s3_file = fs_s3.open(s3_url, mode='rb')

ds2 = xr.open_dataset(s3_file, decode_coords=False)
print(ds2)

In [40]:
# Amazon S3, GFDL netcdf collection  (dataset may consist of multiple netcdf files)

fs_GFDL = s3fs.S3FileSystem(anon=True)

path = 'CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710'

s3path = 's3://esgf-world/'+path+'/*.nc'
fs_GFDL.glob(s3path)

['esgf-world/CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710/va_day_MPI-ESM1-2-LR_ssp585_r2i1p1f1_gn_20150101-20341231.nc',
 'esgf-world/CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710/va_day_MPI-ESM1-2-LR_ssp585_r2i1p1f1_gn_20350101-20541231.nc',
 'esgf-world/CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710/va_day_MPI-ESM1-2-LR_ssp585_r2i1p1f1_gn_20550101-20741231.nc',
 'esgf-world/CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710/va_day_MPI-ESM1-2-LR_ssp585_r2i1p1f1_gn_20750101-20941231.nc',
 'esgf-world/CMIP6/ScenarioMIP/MPI-M/MPI-ESM1-2-LR/ssp585/r2i1p1f1/day/va/gn/v20190710/va_day_MPI-ESM1-2-LR_ssp585_r2i1p1f1_gn_20950101-21001231.nc']

In [41]:
fobj = [fs_GFDL.open(f) for f in fs_GFDL.glob(s3path)]
ds = xr.open_mfdataset(fobj, data_vars='minimal', 
                        use_cftime=True, join='exact', combine='nested', concat_dim='time')
ds

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,116.88 kB
Shape,"(31411, 2)","(7305, 2)"
Count,15 Tasks,5 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 502.58 kB 116.88 kB Shape (31411, 2) (7305, 2) Count 15 Tasks 5 Chunks Type object numpy.ndarray",2  31411,

Unnamed: 0,Array,Chunk
Bytes,502.58 kB,116.88 kB
Shape,"(31411, 2)","(7305, 2)"
Count,15 Tasks,5 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(96, 2)","(96, 2)"
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 1.54 kB 1.54 kB Shape (96, 2) (96, 2) Count 20 Tasks 1 Chunks Type float64 numpy.ndarray",2  96,

Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(96, 2)","(96, 2)"
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.07 kB,3.07 kB
Shape,"(192, 2)","(192, 2)"
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.07 kB 3.07 kB Shape (192, 2) (192, 2) Count 20 Tasks 1 Chunks Type float64 numpy.ndarray",2  192,

Unnamed: 0,Array,Chunk
Bytes,3.07 kB,3.07 kB
Shape,"(192, 2)","(192, 2)"
Count,20 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,18.53 GB,4.31 GB
Shape,"(31411, 8, 96, 192)","(7305, 8, 96, 192)"
Count,15 Tasks,5 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 18.53 GB 4.31 GB Shape (31411, 8, 96, 192) (7305, 8, 96, 192) Count 15 Tasks 5 Chunks Type float32 numpy.ndarray",31411  1  192  96  8,

Unnamed: 0,Array,Chunk
Bytes,18.53 GB,4.31 GB
Shape,"(31411, 8, 96, 192)","(7305, 8, 96, 192)"
Count,15 Tasks,5 Chunks
Type,float32,numpy.ndarray
