In [1]:
from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import dask
import intake
import fsspec

%matplotlib inline

In [2]:
def drop_all_bounds(ds):
    drop_vars = [vname for vname in ds.coords
                 if (('_bounds') in vname ) or ('_bnds') in vname]
    return ds.drop(drop_vars)

In [3]:
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")

In [4]:
col

Unnamed: 0,unique
activity_id,18
institution_id,36
source_id,88
experiment_id,170
member_id,657
table_id,37
variable_id,700
grid_label,10
zstore,514961
dcpp_init_year,60


In [5]:
type(col)

intake_esm.core.esm_datastore

In [6]:
col.df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,ps,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
1,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rsds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
2,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlus,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
3,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
4,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706


In [7]:
col.df.columns

Index(['activity_id', 'institution_id', 'source_id', 'experiment_id',
       'member_id', 'table_id', 'variable_id', 'grid_label', 'zstore',
       'dcpp_init_year', 'version'],
      dtype='object')

In [8]:
[eid for eid in col.df['experiment_id'].unique() if 'ssp' in eid]

['ssp585',
 'ssp245',
 'ssp370SST-lowCH4',
 'ssp370-lowNTCF',
 'ssp370SST-lowNTCF',
 'ssp370SST-ssp126Lu',
 'ssp370SST',
 'ssp370pdSST',
 'ssp119',
 'ssp370',
 'esm-ssp585-ssp126Lu',
 'ssp126-ssp370Lu',
 'ssp370-ssp126Lu',
 'ssp126',
 'esm-ssp585',
 'ssp245-GHG',
 'ssp245-nat',
 'ssp460',
 'ssp434',
 'ssp534-over',
 'ssp245-stratO3',
 'ssp245-aer',
 'ssp245-cov-modgreen',
 'ssp245-cov-fossil',
 'ssp245-cov-strgreen',
 'ssp245-covid',
 'ssp585-bgc']

In [9]:
expts = ['historical', 'ssp245', 'ssp585']

In [10]:
query = dict(
    experiment_id=expts,
    table_id='Amon',                           
    variable_id=['tas'],
    member_id = 'r1i1p1f1',                     
)

In [11]:
query

{'experiment_id': ['historical', 'ssp245', 'ssp585'],
 'table_id': 'Amon',
 'variable_id': ['tas'],
 'member_id': 'r1i1p1f1'}

In [12]:
col_subset = col.search(require_all_on=["source_id"], **query)

In [13]:
col_subset.df.groupby("source_id")[["experiment_id", "variable_id", "table_id"]].nunique()

Unnamed: 0_level_0,experiment_id,variable_id,table_id
source_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ACCESS-CM2,3,1,1
AWI-CM-1-1-MR,3,1,1
BCC-CSM2-MR,3,1,1
CAMS-CSM1-0,3,1,1
CAS-ESM2-0,3,1,1
CESM2-WACCM,3,1,1
CIESM,3,1,1
CMCC-CM2-SR5,3,1,1
CMCC-ESM2,3,1,1
CanESM5,3,1,1


In [14]:
query.update({'source_id':'ACCESS-CM2'})

In [15]:
query

{'experiment_id': ['historical', 'ssp245', 'ssp585'],
 'table_id': 'Amon',
 'variable_id': ['tas'],
 'member_id': 'r1i1p1f1',
 'source_id': 'ACCESS-CM2'}

In [16]:
col_subset = col.search(require_all_on=["source_id"], **query)

In [17]:
col_subset.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,ScenarioMIP,CSIRO-ARCCSS,ACCESS-CM2,ssp245,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP6/ScenarioMIP/CSIRO-ARCCSS/ACCE...,,20191108
1,CMIP,CSIRO-ARCCSS,ACCESS-CM2,historical,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/...,,20191108
2,ScenarioMIP,CSIRO-ARCCSS,ACCESS-CM2,ssp585,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP6/ScenarioMIP/CSIRO-ARCCSS/ACCE...,,20210317


In [18]:
loc_historical = col_subset.df.query("experiment_id == 'historical'").zstore.values[0]

In [19]:
loc_historical

'gs://cmip6/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/historical/r1i1p1f1/Amon/tas/gn/v20191108/'

In [20]:
loc_ssp245 = col_subset.df.query("experiment_id == 'ssp245'").zstore.values[0]

In [21]:
loc_ssp245

'gs://cmip6/CMIP6/ScenarioMIP/CSIRO-ARCCSS/ACCESS-CM2/ssp245/r1i1p1f1/Amon/tas/gn/v20191108/'

In [22]:
loc_ssp585 = col_subset.df.query("experiment_id == 'ssp585'").zstore.values[0]

In [23]:
loc_ssp585

'gs://cmip6/CMIP6/ScenarioMIP/CSIRO-ARCCSS/ACCESS-CM2/ssp585/r1i1p1f1/Amon/tas/gn/v20210317/'

### open the datasets 

In [24]:
dset_historical = xr.open_zarr(fsspec.get_mapper(loc_historical), consolidated=True)

In [25]:
dset_historical.info()

xarray.Dataset {
dimensions:
	lat = 144 ;
	bnds = 2 ;
	lon = 192 ;
	time = 1980 ;

variables:
	float64 height() ;
		height:axis = Z ;
		height:long_name = height ;
		height:positive = up ;
		height:standard_name = height ;
		height:units = m ;
	float64 lat(lat) ;
		lat:axis = Y ;
		lat:bounds = lat_bnds ;
		lat:long_name = Latitude ;
		lat:standard_name = latitude ;
		lat:units = degrees_north ;
	float64 lat_bnds(lat, bnds) ;
	float64 lon(lon) ;
		lon:axis = X ;
		lon:bounds = lon_bnds ;
		lon:long_name = Longitude ;
		lon:standard_name = longitude ;
		lon:units = degrees_east ;
	float64 lon_bnds(lon, bnds) ;
	float32 tas(time, lat, lon) ;
		tas:cell_measures = area: areacella ;
		tas:cell_methods = area: time: mean ;
		tas:comment = near-surface (usually, 2 meter) air temperature ;
		tas:history = 2019-11-08T06:41:45Z altered by CMOR: Treated scalar dimension: 'height'. 2019-11-08T06:41:45Z altered by CMOR: replaced missing value flag (-1.07374e+09) with standard missing value (1e+20)

In [26]:
dset_historical = drop_all_bounds(dset_historical)

In [27]:
dset_historical

Unnamed: 0,Array,Chunk
Bytes,208.83 MiB,70.45 MiB
Shape,"(1980, 144, 192)","(668, 144, 192)"
Count,4 Tasks,3 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 208.83 MiB 70.45 MiB Shape (1980, 144, 192) (668, 144, 192) Count 4 Tasks 3 Chunks Type float32 numpy.ndarray",192  144  1980,

Unnamed: 0,Array,Chunk
Bytes,208.83 MiB,70.45 MiB
Shape,"(1980, 144, 192)","(668, 144, 192)"
Count,4 Tasks,3 Chunks
Type,float32,numpy.ndarray


In [28]:
dset_ssp245 = xr.open_zarr(fsspec.get_mapper(loc_ssp245), consolidated=True)
dset_ssp245 = drop_all_bounds(dset_ssp245)

In [29]:
dset_ssp585 = xr.open_zarr(fsspec.get_mapper(loc_ssp585), consolidated=True)
dset_ssp585 = drop_all_bounds(dset_ssp585)

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(array[self.key], dtype=None)


In [30]:
dset_ssp585

Unnamed: 0,Array,Chunk
Bytes,361.97 MiB,70.45 MiB
Shape,"(3432, 144, 192)","(668, 144, 192)"
Count,7 Tasks,6 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 361.97 MiB 70.45 MiB Shape (3432, 144, 192) (668, 144, 192) Count 7 Tasks 6 Chunks Type float32 numpy.ndarray",192  144  3432,

Unnamed: 0,Array,Chunk
Bytes,361.97 MiB,70.45 MiB
Shape,"(3432, 144, 192)","(668, 144, 192)"
Count,7 Tasks,6 Chunks
Type,float32,numpy.ndarray
