In [1]:
from matplotlib import pyplot as plt
import intake
import numpy as np
import xarray as xr
import pandas as pd
import cftime
import os

### Read the metadata into a pandas.DataFrame

In [2]:
url="https://raw.githubusercontent.com/NCAR/intake-esm-datastore/master/catalogs/pangeo-cmip6.json"
dataframe = intake.open_esm_datastore(url)
dataframe

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,unique
activity_id,15
institution_id,34
source_id,79
experiment_id,106
member_id,213
table_id,30
variable_id,391
grid_label,10
zstore,290963
dcpp_init_year,60


### Set the search parameters for the data we want

This is an example for a single model and single variable.  We need this data for the following variables:

* ua
* va
* pr

and models:

* institution: NCAR  model: CESM2
* institution: NOAA  model: GFDL-CM4
* institution: CCCMa  model: CanESM5
* institution: CAMS  model: CAMS-CSM1-0/

The example puts the data in `base_path=/scratch/kpegion/cmip6/data/`
These data need to go in `base_path=/shared/cmip6/data/`

There are other files in `/shared/cmip6/data/` that should not be disturbed

In [3]:
exp='historical'
table='day'
variables='ua'
model='CESM2'
institution='NCAR'
member='r1i1p1f1'

### Search for the data and get the available datasets into a datasets dictionary

In [4]:
models = dataframe.search(experiment_id=exp,
                          table_id=table,
                          variable_id=variable,
                          source_id=model,
                          member_id=member,
                          institution_id=institution)
datasets = models.to_dataset_dict()
datasets


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'


{'CMIP.NCAR.CESM2.historical.day.gn': <xarray.Dataset>
 Dimensions:    (lat: 192, lon: 288, member_id: 1, nbnd: 2, plev: 8, time: 60226)
 Coordinates:
     lon_bnds   (lon, nbnd) float32 dask.array<chunksize=(288, 2), meta=np.ndarray>
     time_bnds  (time, nbnd) object dask.array<chunksize=(30113, 1), meta=np.ndarray>
     lat_bnds   (lat, nbnd) float32 dask.array<chunksize=(192, 2), meta=np.ndarray>
   * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
   * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
   * time       (time) object 1850-01-01 00:00:00 ... 2015-01-01 00:00:00
   * plev       (plev) float64 1e+05 8.5e+04 7e+04 5e+04 ... 1e+04 5e+03 1e+03
   * member_id  (member_id) <U8 'r1i1p1f1'
 Dimensions without coordinates: nbnd
 Data variables:
     ua         (member_id, time, plev, lat, lon) float32 dask.array<chunksize=(1, 82, 8, 192, 288), meta=np.ndarray>
 Attributes:
     Conventions:             CF-1.7 CMIP-6.2
     acti

### Loop over the data identified in search and store as `xarray.Dataset`.  
Once we have it as an `xarray.Dataset`, we can extract and write data as usual.

In [15]:
# Set output path information
base_path='/scratch/kpegion/cmip6/data/'
out_path=base_path+exp+'/atmos/day/'+variable+'/'+institution+'.'+model+'/'

# Check to see if directory exists
if not os.path.exists(out_path):
    print("Making Directory: ", out_path)
    os.makedirs(out_path)

for keys in datasets.keys():
    ds = datasets[keys]
    time_str=ds['time'].dt.strftime("%Y%m%d").values
    print(ds['plev'])
    # Winds (extract levels)
    print(variable)
    if (variable=='ua' or variable=='va'):

        # Extract the levels
        ds_200=ds.sel(plev=25000.)
        ds_850=ds.sel(plev=85000.)
        
        # Combine data for two levels together
        ds_lev=xr.combine_nested([ds_200,ds_850],concat_dim='plev')

        # Write out the data
        out_file=out_path+keys+'.'+time_str[0]+'-'+time_str[-1]+'.nc'
        ds.to_netcdf(outfile)
        
    # Precip
    else:
        # Just write out the data
        out_file=out_path+keys+'.'+time_str[0]+'-'+time_str[-1]+'.nc'
        ds.to_netcdf(out_file)

<xarray.DataArray 'plev' (plev: 8)>
array([100000.,  85000.,  70000.,  50000.,  25000.,  10000.,   5000.,   1000.])
Coordinates:
  * plev     (plev) float64 1e+05 8.5e+04 7e+04 5e+04 2.5e+04 1e+04 5e+03 1e+03
Attributes:
    axis:           Z
    positive:       down
    requested:      100000. 85000. 70000. 50000. 25000. 10000. 5000. 1000.
    standard_name:  air_pressure
    title:          pressure
    type:           double
    units:          Pa
ua
<xarray.Dataset>
Dimensions:    (lat: 192, lon: 288, member_id: 1, nbnd: 2, time: 60226)
Coordinates:
    lon_bnds   (lon, nbnd) float32 dask.array<chunksize=(288, 2), meta=np.ndarray>
    time_bnds  (time, nbnd) object dask.array<chunksize=(30113, 1), meta=np.ndarray>
    lat_bnds   (lat, nbnd) float32 dask.array<chunksize=(192, 2), meta=np.ndarray>
  * lon        (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8
  * lat        (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0
  * time       (time) object 1850

In [16]:
ds_lev

Unnamed: 0,Array,Chunk
Bytes,2.30 kB,2.30 kB
Shape,"(288, 2)","(288, 2)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.30 kB 2.30 kB Shape (288, 2) (288, 2) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",2  288,

Unnamed: 0,Array,Chunk
Bytes,2.30 kB,2.30 kB
Shape,"(288, 2)","(288, 2)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,963.62 kB,240.90 kB
Shape,"(60226, 2)","(30113, 1)"
Count,17 Tasks,4 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 963.62 kB 240.90 kB Shape (60226, 2) (30113, 1) Count 17 Tasks 4 Chunks Type object numpy.ndarray",2  60226,

Unnamed: 0,Array,Chunk
Bytes,963.62 kB,240.90 kB
Shape,"(60226, 2)","(30113, 1)"
Count,17 Tasks,4 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(192, 2)","(192, 2)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.54 kB 1.54 kB Shape (192, 2) (192, 2) Count 5 Tasks 1 Chunks Type float32 numpy.ndarray",2  192,

Unnamed: 0,Array,Chunk
Bytes,1.54 kB,1.54 kB
Shape,"(192, 2)","(192, 2)"
Count,5 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,26.64 GB,18.14 MB
Shape,"(2, 1, 60226, 192, 288)","(1, 1, 82, 192, 288)"
Count,5881 Tasks,1470 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 26.64 GB 18.14 MB Shape (2, 1, 60226, 192, 288) (1, 1, 82, 192, 288) Count 5881 Tasks 1470 Chunks Type float32 numpy.ndarray",1  2  288  192  60226,

Unnamed: 0,Array,Chunk
Bytes,26.64 GB,18.14 MB
Shape,"(2, 1, 60226, 192, 288)","(1, 1, 82, 192, 288)"
Count,5881 Tasks,1470 Chunks
Type,float32,numpy.ndarray
