## Test cmip6 data


In [71]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import xesmf as xe
import cartopy
from tqdm.autonotebook import tqdm  # Fancy progress bars for our loops!
import intake
# util.py is in the local directory
# it contains code that is common across project notebooks
# or routines that are too extensive and might otherwise clutter
# the notebook design
import util

%matplotlib inline
plt.rcParams['figure.figsize'] = 12, 6
%config InlineBackend.figure_format = 'retina' 

In [72]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

In [73]:
col = intake.open_esm_datastore("../catalogs/pangeo-cmip6.json")

In [74]:
cat_tas = col.search(experiment_id='historical',  # pick the `historical` forcing experiment
                 table_id='Amon',             # choose to look at atmospheric variables (A) saved at monthly resolution (mon)
                 variable_id='tas',           # choose to look at near-surface air temperature (tas) as our variable
                 member_id = 'r1i1p1f1')      # arbitrarily pick one realization for each model (i.e. just one set of initial conditions)

cat_nbp = col.search(experiment_id='historical',  # pick the `historical` forcing experiment
                 table_id='Lmon',             # choose to look at atmospheric variables (A) saved at monthly resolution (mon)
                 variable_id='nbp',           # choose to look at near-surface air temperature (tas) as our variable
                 member_id = 'r1i1p1f1')      # arbitrarily pick one realization for each model (i.e. just one set of initial conditions)

cat_fgco2 = col.search(experiment_id='historical',  # pick the `historical` forcing experiment
                 table_id='Omon',             # choose to look at atmospheric variables (A) saved at monthly resolution (mon)
                 variable_id='fgco2',           # choose to look at near-surface air temperature (tas) as our variable
                 member_id = 'r1i1p1f1')      # arbitrarily pick one realization for each model (i.e. just one set of initial conditions)


In [75]:
cat = col.search(experiment_id='historical',  # pick the `historical` forcing experiment
                 source_id='CanESM5',
                 table_id=['Amon', 'Lmon', 'Omon', 'fx', 'Ofx'],             # choose to look at atmospheric variables (A) saved at monthly resolution (mon)
                 variable_id=['tas', 'nbp', 'fgco2', 'areacella', 'areacello'],           # choose to look at near-surface air temperature (tas) as our variable
                 member_id = 'r1i1p1f1')      # arbitrarily pick one realization for each model (i.e. just one set of initial conditions)


In [76]:
cat.df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
9451,CMIP,CCCma,CanESM5,historical,r1i1p1f1,Amon,tas,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r1i1p...,
9473,CMIP,CCCma,CanESM5,historical,r1i1p1f1,Lmon,nbp,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r1i1p...,
9478,CMIP,CCCma,CanESM5,historical,r1i1p1f1,Ofx,areacello,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r1i1p...,
9484,CMIP,CCCma,CanESM5,historical,r1i1p1f1,Omon,fgco2,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r1i1p...,
9538,CMIP,CCCma,CanESM5,historical,r1i1p1f1,fx,areacella,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r1i1p...,


In [77]:
dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': False})

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'

--> There will be 5 group(s)


In [78]:
#dset_dict = dict(dset_dict_tas, **dset_dict_rad)
ds_dict = {}
gmst_dict = {}
imbalance_dict = {}
for name, ds in tqdm(dset_dict.items()):
    model_name = name.split(".")[2]
    print(ds)

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

<xarray.Dataset>
Dimensions:    (bnds: 2, lat: 64, lon: 128, member_id: 1, time: 1980)
Coordinates:
  * lon        (lon) float64 0.0 2.812 5.625 8.438 ... 348.8 351.6 354.4 357.2
  * lat        (lat) float64 -87.86 -85.1 -82.31 -79.53 ... 82.31 85.1 87.86
  * time       (time) float64 15.5 45.0 74.5 ... 6.015e+04 6.018e+04 6.021e+04
  * member_id  (member_id) <U8 'r1i1p1f1'
Dimensions without coordinates: bnds
Data variables:
    height     float64 ...
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(128, 2), meta=np.ndarray>
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(64, 2), meta=np.ndarray>
    time_bnds  (time, bnds) float64 dask.array<chunksize=(1980, 2), meta=np.ndarray>
    tas        (member_id, time, lat, lon) float32 dask.array<chunksize=(1, 600, 64, 128), meta=np.ndarray>
Attributes:
    CCCma_model_hash:            3dedf95315d603326fde4f5340dc0519d80d10c0
    CCCma_parent_runid:          rc3-pictrl
    CCCma_pycmor_hash:           33c30511acc319a9824063