In [1]:
import numpy as np
import xarray as xr
import intake
import gcsfs
import matplotlib.pyplot as plt
import cartopy.crs as ccrs



In [2]:
url = "https://raw.githubusercontent.com/andrewpauling/cmip6hack-so-project/master/catalogs/pangeo-cmip6.json"
col = intake.open_esm_datastore(url)
col.df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
0,AerChemMIP,BCC,BCC-ESM1,ssp370,r1i1p1f1,Amon,pr,gn,gs://cmip6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1...,
1,AerChemMIP,BCC,BCC-ESM1,ssp370,r1i1p1f1,Amon,prsn,gn,gs://cmip6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1...,
2,AerChemMIP,BCC,BCC-ESM1,ssp370,r1i1p1f1,Amon,tas,gn,gs://cmip6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1...,
3,AerChemMIP,BCC,BCC-ESM1,ssp370,r1i1p1f1,Amon,tasmax,gn,gs://cmip6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1...,
4,AerChemMIP,BCC,BCC-ESM1,ssp370,r1i1p1f1,Amon,tasmin,gn,gs://cmip6/AerChemMIP/BCC/BCC-ESM1/ssp370/r1i1...,


In [5]:
cat = col.search(experiment_id=['historical'], table_id=['SImon', 'Omon'],
                 grid_label='gn')
cat.df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
163,CMIP,AWI,AWI-CM-1-1-MR,historical,r1i1p1f1,Omon,hfds,gn,gs://cmip6/CMIP/AWI/AWI-CM-1-1-MR/historical/r...,
164,CMIP,AWI,AWI-CM-1-1-MR,historical,r1i1p1f1,Omon,mlotst,gn,gs://cmip6/CMIP/AWI/AWI-CM-1-1-MR/historical/r...,
165,CMIP,AWI,AWI-CM-1-1-MR,historical,r1i1p1f1,Omon,so,gn,gs://cmip6/CMIP/AWI/AWI-CM-1-1-MR/historical/r...,
166,CMIP,AWI,AWI-CM-1-1-MR,historical,r1i1p1f1,Omon,sos,gn,gs://cmip6/CMIP/AWI/AWI-CM-1-1-MR/historical/r...,
167,CMIP,AWI,AWI-CM-1-1-MR,historical,r1i1p1f1,Omon,tauuo,gn,gs://cmip6/CMIP/AWI/AWI-CM-1-1-MR/historical/r...,


In [6]:
import pprint 
uni_dict = col.unique(['source_id', 'experiment_id', 'table_id'])
pprint.pprint(uni_dict, compact=True)

{'experiment_id': {'count': 29,
                   'values': ['ssp370', 'esm-ssp585', '1pctCO2-bgc', 'hist-bgc',
                              '1pctCO2', 'abrupt-4xCO2', 'historical',
                              'piControl', 'amip', 'esm-hist', 'esm-piControl',
                              'hist-GHG', 'hist-aer', 'hist-nat', 'dcppA-assim',
                              'dcppA-hindcast', 'dcppC-hindcast-noAgung',
                              'dcppC-hindcast-noElChichon',
                              'dcppC-hindcast-noPinatubo', 'highresSST-present',
                              'control-1950', 'hist-1950', 'deforest-globe',
                              'esm-ssp585-ssp126Lu', 'omip1', 'lgm', 'ssp126',
                              'ssp245', 'ssp585']},
 'source_id': {'count': 48,
               'values': ['BCC-ESM1', 'BCC-CSM2-MR', 'CanESM5', 'CNRM-ESM2-1',
                          'UKESM1-0-LL', 'GISS-E2-1-G', 'CESM2', 'GFDL-ESM4',
                          'AWI-CM-1-1-MR', 'CAM

Find all the models that have monthly siconc and monthly thetao and so

In [7]:
cat = col.search(experiment_id=['historical'], table_id=['SImon', 'Omon'],
                 grid_label='gn', variable_id=['siconc', 'thetao', 'so'])
cat.df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
165,CMIP,AWI,AWI-CM-1-1-MR,historical,r1i1p1f1,Omon,so,gn,gs://cmip6/CMIP/AWI/AWI-CM-1-1-MR/historical/r...,
169,CMIP,AWI,AWI-CM-1-1-MR,historical,r1i1p1f1,Omon,thetao,gn,gs://cmip6/CMIP/AWI/AWI-CM-1-1-MR/historical/r...,
323,CMIP,BCC,BCC-CSM2-MR,historical,r1i1p1f1,Omon,so,gn,gs://cmip6/CMIP/BCC/BCC-CSM2-MR/historical/r1i...,
325,CMIP,BCC,BCC-CSM2-MR,historical,r1i1p1f1,Omon,thetao,gn,gs://cmip6/CMIP/BCC/BCC-CSM2-MR/historical/r1i...,
586,CMIP,BCC,BCC-ESM1,historical,r1i1p1f1,Omon,so,gn,gs://cmip6/CMIP/BCC/BCC-ESM1/historical/r1i1p1...,


# Get models that have monthly sea ice concentration, thetao and so

In [9]:
models = set(uni_dict['source_id']['values']) # all the models

for table_id in ['SImon', 'Omon']:
    if table_id == 'SImon':
        query = dict(experiment_id='historical', table_id=table_id, 
                     variable_id='siconc', grid_label='gn')  
        cat = col.search(**query)
        models = models.intersection({model for model in cat.df.source_id.unique().tolist()})
    else:
        for variable_id in ['thetao', 'so']:
            query = dict(experiment_id='historical', table_id=table_id, 
                         variable_id=variable_id, grid_label='gn')  
            cat = col.search(**query)
            models = models.intersection({model for model in cat.df.source_id.unique().tolist()})
        

models = list(models)
models

['GFDL-CM4',
 'SAM0-UNICON',
 'CESM2',
 'MIROC6',
 'CanESM5',
 'MIROC-ES2L',
 'CNRM-CM6-1',
 'CAMS-CSM1-0',
 'IPSL-CM6A-LR',
 'EC-Earth3-Veg',
 'CNRM-ESM2-1',
 'NESM3',
 'UKESM1-0-LL',
 'HadGEM3-GC31-LL']

In [16]:
cat = col.search(experiment_id='historical', table_id=['Omon', 'SImon'], 
                 variable_id=['siconc', 'thetao', 'so'], grid_label='gn', source_id=models)
cat.df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
838,CMIP,CAMS,CAMS-CSM1-0,historical,r1i1p1f1,Omon,so,gn,gs://cmip6/CMIP/CAMS/CAMS-CSM1-0/historical/r1...,
841,CMIP,CAMS,CAMS-CSM1-0,historical,r1i1p1f1,Omon,thetao,gn,gs://cmip6/CMIP/CAMS/CAMS-CSM1-0/historical/r1...,
847,CMIP,CAMS,CAMS-CSM1-0,historical,r1i1p1f1,SImon,siconc,gn,gs://cmip6/CMIP/CAMS/CAMS-CSM1-0/historical/r1...,
863,CMIP,CAMS,CAMS-CSM1-0,historical,r2i1p1f1,SImon,siconc,gn,gs://cmip6/CMIP/CAMS/CAMS-CSM1-0/historical/r2...,
1455,CMIP,CCCma,CanESM5,historical,r10i1p1f1,Omon,so,gn,gs://cmip6/CMIP/CCCma/CanESM5/historical/r10i1...,


# Make sure each entry has the same ensemble member

In [31]:
members = set(uni_dict['member_id']['values']) # all the ensemble members

for model in models:
    tmp = cat.search(source_id=model)
    for member in tmp.df['member_id']:
        query = dict(experiment_id='historical', table_id=table_id, 
                     variable_id='siconc', grid_label='gn')  
        cat = col.search(**query)
        members = members.intersection({m for model in cat.df.source_id.unique().tolist()})
        
    
tmp.df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year
10136,CMIP,MOHC,HadGEM3-GC31-LL,historical,r1i1p1f3,SImon,siconc,gn,gs://cmip6/CMIP/MOHC/HadGEM3-GC31-LL/historica...,
10200,CMIP,MOHC,HadGEM3-GC31-LL,historical,r2i1p1f3,Omon,so,gn,gs://cmip6/CMIP/MOHC/HadGEM3-GC31-LL/historica...,
10204,CMIP,MOHC,HadGEM3-GC31-LL,historical,r2i1p1f3,Omon,thetao,gn,gs://cmip6/CMIP/MOHC/HadGEM3-GC31-LL/historica...,
10217,CMIP,MOHC,HadGEM3-GC31-LL,historical,r2i1p1f3,SImon,siconc,gn,gs://cmip6/CMIP/MOHC/HadGEM3-GC31-LL/historica...,
10260,CMIP,MOHC,HadGEM3-GC31-LL,historical,r3i1p1f3,SImon,siconc,gn,gs://cmip6/CMIP/MOHC/HadGEM3-GC31-LL/historica...,


In [127]:
filt_dict = dict()

for model in models:
    tmp2 = cat.search(source_id=model)
    tmp2.df.head()
    members = tmp2.df['member_id']
    memlist = list()
    for member in list(members):
        a = tmp2.search(member_id=member, variable_id='siconc').df['activity_id'].empty
        b = tmp2.search(member_id=member, variable_id='thetao').df['activity_id'].empty
        c = tmp2.search(member_id=member, variable_id='so').df['activity_id'].empty
        if not a and not b and not c and member not in memlist:
            memlist.append(member)
    filt_dict[model] =  memlist
            


In [128]:
filt_dict

{'GFDL-CM4': ['r1i1p1f1'],
 'SAM0-UNICON': ['r1i1p1f1'],
 'CESM2': ['r10i1p1f1',
  'r11i1p1f1',
  'r1i1p1f1',
  'r2i1p1f1',
  'r3i1p1f1',
  'r4i1p1f1',
  'r5i1p1f1',
  'r6i1p1f1',
  'r7i1p1f1',
  'r8i1p1f1',
  'r9i1p1f1'],
 'MIROC6': ['r10i1p1f1'],
 'CanESM5': ['r10i1p1f1'],
 'MIROC-ES2L': ['r1i1p1f2'],
 'CNRM-CM6-1': ['r10i1p1f2'],
 'CAMS-CSM1-0': ['r1i1p1f1'],
 'IPSL-CM6A-LR': ['r10i1p1f1'],
 'EC-Earth3-Veg': ['r1i1p1f1'],
 'CNRM-ESM2-1': ['r2i1p1f2'],
 'NESM3': ['r1i1p1f1'],
 'UKESM1-0-LL': ['r1i1p1f2'],
 'HadGEM3-GC31-LL': ['r2i1p1f3']}