# Water masses of the future

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import gcsfs

import warnings
warnings.simplefilter("ignore")

In [2]:
fs = gcsfs.GCSFileSystem(token='anon', access='read_only')

In [12]:
cat = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
#cat = pd.read_csv('ftp://ftp.cgd.ucar.edu/archive/aletheia-data/intake-esm-datastore/catalogs/glade-cmip6.csv')
#cat.info()

In [5]:
# define a simple search on keywords
def search_df(df, verbose= False, **search):
    "search by keywords - if list, then match exactly, otherwise match as substring"
    keys = ['activity_id','institution_id','source_id','experiment_id','member_id', 'table_id', 'variable_id', 'grid_label']
    d = df
    for skey in search.keys():
        
        if isinstance(search[skey], str):  # match a string as a substring
            d = d[d[skey].str.contains(search[skey])]
        else:
            dk = []
            for key in search[skey]:       # match a list of strings exactly
                dk += [d[d[skey]==key]]
            d = pd.concat(dk)
            keys.remove(skey)
    if verbose:
        for key in keys:
            print(key,' = ',list(d[key].unique()))      
    return d

## Load CMIP6 datasets

In [13]:
historical = search_df(cat, experiment_id=['piControl'], table_id='Omon', variable_id=['thetao', 'so'], 
                       grid_label=['gr'], verbose=True)

activity_id  =  ['CMIP']
institution_id  =  ['NASA-GISS', 'NCAR', 'NOAA-GFDL']
source_id  =  ['GISS-E2-1-H', 'CESM2-WACCM', 'CESM2', 'GFDL-CM4', 'GFDL-ESM4']
member_id  =  ['r1i1p1f1']
table_id  =  ['Omon']


In [14]:
ssp585 = search_df(cat, experiment_id=['ssp585'], table_id='Omon', variable_id=['thetao', 'so'], 
                   grid_label=['gr'], verbose=True)

activity_id  =  ['ScenarioMIP']
institution_id  =  ['NCAR', 'NOAA-GFDL']
source_id  =  ['CESM2-WACCM', 'CESM2', 'GFDL-CM4']
member_id  =  ['r1i1p1f1', 'r2i1p1f1']
table_id  =  ['Omon']


In [17]:
zstores_hist = historical.zstore.unique()

dset_dict_hist = {}
for zstore in zstores_hist:
    name = zstore.split('gs://cmip6/')[1].replace('/','.')[:-1]
    print(name)

    ds = xr.open_zarr(fs.get_mapper(zstore))       # DO NOT USE "decode_times = False"
    print(dict(ds.dims),'\n')
    
    dset_dict_hist[name] = ds

CMIP.NASA-GISS.GISS-E2-1-H.piControl.r1i1p1f1.Omon.thetao.gr
{'bnds': 2, 'lat': 180, 'lev': 33, 'lon': 360, 'time': 4812} 

CMIP.NCAR.CESM2-WACCM.piControl.r1i1p1f1.Omon.thetao.gr
{'d2': 2, 'lat': 180, 'lev': 33, 'lon': 360, 'time': 5988} 

CMIP.NCAR.CESM2.piControl.r1i1p1f1.Omon.thetao.gr
{'d2': 2, 'lat': 180, 'lev': 33, 'lon': 360, 'time': 14400} 

CMIP.NOAA-GFDL.GFDL-CM4.piControl.r1i1p1f1.Omon.thetao.gr
{'bnds': 2, 'lat': 180, 'lev': 35, 'lon': 360, 'time': 6000} 

CMIP.NOAA-GFDL.GFDL-ESM4.piControl.r1i1p1f1.Omon.thetao.gr
{'bnds': 2, 'lat': 180, 'lev': 35, 'lon': 360, 'time': 6000} 

CMIP.NASA-GISS.GISS-E2-1-H.piControl.r1i1p1f1.Omon.so.gr
{'bnds': 2, 'lat': 180, 'lev': 33, 'lon': 360, 'time': 4812} 

CMIP.NCAR.CESM2-WACCM.piControl.r1i1p1f1.Omon.so.gr
{'d2': 2, 'lat': 180, 'lev': 33, 'lon': 360, 'time': 5988} 

CMIP.NCAR.CESM2.piControl.r1i1p1f1.Omon.so.gr
{'d2': 2, 'lat': 180, 'lev': 33, 'lon': 360, 'time': 14400} 

CMIP.NOAA-GFDL.GFDL-CM4.piControl.r1i1p1f1.Omon.so.gr
{'bnds': 