In [1]:
import intake

In [2]:
def make_rename_map(ds,model,coord_rename_map = {}):
    
    coord_rename_map[model] = {}
    
    if ('latitude' in ds.coords):
        coord_rename_map[model]['latitude'] = 'latitude'
    elif ('lat' in ds.coords) and ('latitude' not in ds.data_vars):
        coord_rename_map[model]['lat'] = 'latitude'
    elif ('lat' not in ds.coords) and ('latitude' not in ds.coords):
        if 'latitude' in ds.data_vars:
            coord_rename_map[model]['latitude'] = 'latitude'
        elif 'lat' in ds.data_vars:
            coord_rename_map[model]['lat'] = 'latitude'
        elif 'nav_lat' in ds.data_vars:
            coord_rename_map[model]['nav_lat'] = 'latitude'
#         elif 'lat_bnds' in ds.data_vars:
#             coord_rename_map[model]['lat_bnds'] = 'latitude'

    if ('longitude' in ds.coords):
        coord_rename_map[model]['longitude'] = 'longitude'
    elif ('lon' in ds.coords) and ('longitude' not in ds.data_vars):
        coord_rename_map[model]['lon'] = 'longitude'
    elif ('lon' not in ds.coords) and ('longitude' not in ds.coords):
        if 'longitude' in ds.data_vars:
            coord_rename_map[model]['longitude'] = 'longitude'
        elif 'lon' in ds.data_vars:
            coord_rename_map[model]['lon'] = 'longitude'
        elif 'nav_lon' in ds.data_vars:
            coord_rename_map[model]['nav_lon'] = 'longitude'
#         elif 'lon_bnds' in ds.data_vars:
#             coord_rename_map[model]['lon_bnds'] = 'longitude'
    
    if ('lev' in ds.coords):
        coord_rename_map[model]['lev'] = 'lev'
    elif ('lev' not in ds.coords):
        if 'olevel' in ds.coords:
            coord_rename_map[model]['olevel'] = 'lev'
        elif 'depth' in ds.coords:
            coord_rename_map[model]['depth'] = 'lev'
        elif 'rho' in ds.coords:
            coord_rename_map[model]['rho'] = 'lev'
    
    return coord_rename_map

# Load the CMIP6 catalog

In [3]:
col = intake.open_esm_datastore('../catalogs/pangeo-cmip6.json')
cat = col.search(experiment_id='historical',
                 table_id='Omon',
                 variable_id='thetao',
                 grid_label='gn')

# dictionary of subset data
dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True},
                                cdf_kwargs={'chunks': {}})

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 22 group(s)


In [4]:
# an example of how a single model is called
ds = dset_dict[f'CMIP.NCAR.CESM2.historical.Omon.gn']
ds

<xarray.Dataset>
Dimensions:    (d2: 2, lev: 60, member_id: 11, nlat: 384, nlon: 320, time: 1980, vertices: 4)
Coordinates:
  * time       (time) object 1850-01-15 13:00:00 ... 2014-12-15 12:00:00
  * lev        (lev) float64 500.0 1.5e+03 2.5e+03 ... 5.125e+05 5.375e+05
  * nlon       (nlon) int32 1 2 3 4 5 6 7 8 ... 313 314 315 316 317 318 319 320
  * nlat       (nlat) int32 1 2 3 4 5 6 7 8 ... 377 378 379 380 381 382 383 384
  * member_id  (member_id) <U9 'r10i1p1f1' 'r11i1p1f1' ... 'r8i1p1f1' 'r9i1p1f1'
Dimensions without coordinates: d2, vertices
Data variables:
    lon_bnds   (nlat, nlon, vertices) float32 dask.array<chunksize=(384, 320, 4), meta=np.ndarray>
    lon        (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>
    lat_bnds   (nlat, nlon, vertices) float32 dask.array<chunksize=(384, 320, 4), meta=np.ndarray>
    lat        (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>
    time_bnds  (time, d2) object dask.array<chunksize=(1

# Create the rename map

In [5]:
model_institude_df = cat.df.drop_duplicates(subset='source_id')[['source_id','institution_id']]

coord_rename_map = {}

for index, row in model_institude_df.iterrows():

    ds = dset_dict[f'CMIP.{row.institution_id}.{row.source_id}.historical.Omon.gn']
    
    coord_rename_map.update(make_rename_map(ds,model=row.source_id))

coord_rename_map

{'AWI-CM-1-1-MR': {'lat': 'latitude', 'lon': 'longitude', 'depth': 'lev'},
 'BCC-CSM2-MR': {'lev': 'lev'},
 'BCC-ESM1': {'lev': 'lev'},
 'CAMS-CSM1-0': {'latitude': 'latitude',
  'longitude': 'longitude',
  'lev': 'lev'},
 'FGOALS-f3-L': {'latitude': 'latitude',
  'longitude': 'longitude',
  'lev': 'lev'},
 'CanESM5': {'latitude': 'latitude', 'longitude': 'longitude', 'lev': 'lev'},
 'CNRM-CM6-1': {'lat': 'latitude', 'lon': 'longitude', 'lev': 'lev'},
 'CNRM-ESM2-1': {'lat': 'latitude', 'lon': 'longitude', 'lev': 'lev'},
 'EC-Earth3-Veg': {'latitude': 'latitude',
  'longitude': 'longitude',
  'lev': 'lev'},
 'IPSL-CM6A-LR': {'nav_lat': 'latitude',
  'nav_lon': 'longitude',
  'olevel': 'lev'},
 'MIROC-ES2L': {'latitude': 'latitude',
  'longitude': 'longitude',
  'lev': 'lev'},
 'MIROC6': {'latitude': 'latitude', 'longitude': 'longitude', 'lev': 'lev'},
 'HadGEM3-GC31-LL': {'latitude': 'latitude',
  'longitude': 'longitude',
  'lev': 'lev'},
 'UKESM1-0-LL': {'latitude': 'latitude',
  'lo

# Test to see if it works

In [7]:
for index, row in model_institude_df.iterrows():

    ds = dset_dict[f'CMIP.{row.institution_id}.{row.source_id}.historical.Omon.gn']
    
    model = row.source_id
    
    print('______________________________________________')
    print(f'{model}\n')
    ds = ds.rename(coord_rename_map[model])
    ds.attrs = ''
    print(f'{ds[["latitude","longitude","lev"]]}\n')

______________________________________________
AWI-CM-1-1-MR

<xarray.Dataset>
Dimensions:    (lev: 46, ncells: 830305)
Coordinates:
  * lev        (lev) float64 -0.0 10.0 20.0 30.0 ... 5.4e+03 5.65e+03 5.9e+03
Dimensions without coordinates: ncells
Data variables:
    latitude   (ncells) float64 dask.array<chunksize=(830305,), meta=np.ndarray>
    longitude  (ncells) float64 dask.array<chunksize=(830305,), meta=np.ndarray>

______________________________________________
BCC-CSM2-MR

<xarray.Dataset>
Dimensions:    (lat: 232, lev: 40, lon: 360)
Coordinates:
  * lev        (lev) float64 5.0 15.0 25.0 ... 4.289e+03 4.807e+03 5.334e+03
  * lon        (lon) float64 0.5 1.5 2.5 3.5 4.5 ... 356.5 357.5 358.5 359.5
  * lat        (lat) float64 -81.5 -80.5 -79.5 -78.5 ... 86.5 87.5 88.5 89.5
Data variables:
    latitude   (lat, lon) float32 dask.array<chunksize=(232, 360), meta=np.ndarray>
    longitude  (lat, lon) float32 dask.array<chunksize=(232, 360), meta=np.ndarray>

____________________