In [1]:
%matplotlib inline

import intake
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

from explore_utils import get_cmip6_catalogue
from extract_utils import find_overlap_models, rename_dimensions



In [3]:
# Select which datasets are required by populating these lists with details
dset_name, exp, var, table_id, grid_label = [], [], [], [], []

# Select name for this clollection of inputs
collection_name = 'historical'
#collection_name = 'piControl'

# 1. siconc
dset_name.append('siconc')
exp.append(collection_name)
var.append('siconc')
table_id.append('SImon')
grid_label.append('gn')

# 2. areacello
dset_name.append('areacello')
exp.append(collection_name)
var.append('areacello')
table_id.append('Ofx')
grid_label.append('gn')

# 3. tas
dset_name.append('tas')
exp.append(collection_name)
var.append('tas')
table_id.append('Amon')
grid_label.append('gn')

In [4]:
# Get full catalogue of CMIP6 data on glade or cloud
cmip6_collection = get_cmip6_catalogue()

In [5]:
# Find where models contain all necessary variables
models_intersect = find_overlap_models(dset_name, exp, var, table_id, grid_label, cmip6_collection)
print(models_intersect)

['MPI-ESM1-2-LR', 'MIROC6', 'CESM2-FV2', 'NorESM2-LM', 'MPI-ESM-1-2-HAM', 'SAM0-UNICON', 'ACCESS-CM2', 'MPI-ESM1-2-HR', 'MIROC-ES2L', 'CESM2-WACCM-FV2', 'ACCESS-ESM1-5', 'CESM2', 'NorCPM1', 'CanESM5', 'CanESM5-CanOE', 'MRI-ESM2-0']


In [6]:
# Get dictionary of file names for speficied data
dset_dict = {}
for i in range(0, len(dset_name)):
    dset_dict[dset_name[i]] = cmip6_collection.search(
                                experiment_id=exp[i], table_id=table_id[i], 
                                variable_id=var[i], grid_label=grid_label[i])

In [7]:
# Loading data
for d in dset_dict.keys():
    print(d)
    dset_dict[d] = dset_dict[d].to_dataset_dict(zarr_kwargs={'consolidated': True, 'decode_times': True}, 
                                           cdf_kwargs={'chunks': {}, 'decode_times': True})
    
# Having some problems, times don't want to be decoded for picontrol. Might not be a problem but should investigate.


siconc
Progress: |███████████████████████████████████████████████████████████████████████████████| 100.0% 

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
             
--> There are 31 group(s)
areacello
Progress: |███████████████████████████████████████████████████████████████████████████████| 100.0% 

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
             
--> There are 24 group(s)
tas
Progress: |███████████████████████████████████████████████████████████████████████████████| 100.0% 

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.table_id.grid_label'
             
--> There are 32 group(s)


In [8]:
# Rename dimensions to i,j so they're consistent across variables
dset_dict_temp = {}
for d in dset_dict.keys():
    dset_dict_temp[d] = {}
    for m in dset_dict[d].keys():
        dset_dict_temp[d][m] = rename_dimensions(dset_dict[d][m], dset_dict_temp)

dset_dict = dset_dict_temp

In [9]:
# Making key of dataset model name
dset_dict_temp = {}
for d in dset_dict.keys():
    dset_dict_temp[d] = {}
    for key, item in dset_dict[d].items():
        model = item.attrs['source_id']
        if model in models_intersect:
            dset_dict_temp[d][model] = item

dset_dict = dset_dict_temp

In [10]:
# Adding areacello and/or areacella as a variable in other datasets
if 'areacello' in dset_dict.keys():
    for d in dset_dict.keys(): # for each variable
        for key in dset_dict[d].keys(): # for each model
            # if table_id suggests variable is a sea ice or ocean variable, add areacello
            if dset_dict[d][key].attrs['table_id'][0] in ['S', 'O']:
                dset_dict[d][key]['areacello'] = dset_dict['areacello'][key]['areacello']

if 'areacella' in dset_dict.keys():
    for d in dset_dict.keys(): # for each variable
        for key in dset_dict[d].keys(): # for each model
            # if table_id suggests variable is an atmosphere variable, add areacella
            if dset_dict[d][key].attrs['table_id'][0] in ['A']:
                dset_dict[d][key]['areacella'] = dset_dict['areacella'][key]['areacella']                

In [11]:
# Ensure only ensemble members that overlap all data sets are included
dset_dict_temp = {}

for d in dset_dict.keys():
    dset_dict_temp[d] = {}
    
for m in models_intersect:
    ems = [0]
    for d in dset_dict.keys():
        dset_dict_temp[d][m] = {}
        if d is not 'areacello':
            if ems[0]==0:
                ems = dset_dict[d][m]['member_id'].values
            else:
                ems = list(set(ems) & set(dset_dict[d][m]['member_id'].values))
    
    for d in dset_dict.keys():                       
        dset_dict_temp[d][m] = dset_dict[d][m].sel(member_id=ems)
        
    print(m, ems)
    print()
        
dset_dict = dset_dict_temp

MPI-ESM1-2-LR ['r8i1p1f1', 'r1i1p1f1', 'r6i1p1f1', 'r9i1p1f1', 'r5i1p1f1', 'r4i1p1f1', 'r10i1p1f1', 'r3i1p1f1', 'r2i1p1f1', 'r7i1p1f1']

MIROC6 ['r8i1p1f1', 'r1i1p1f1', 'r6i1p1f1', 'r9i1p1f1', 'r5i1p1f1', 'r4i1p1f1', 'r10i1p1f1', 'r3i1p1f1', 'r2i1p1f1', 'r7i1p1f1']

CESM2-FV2 ['r1i1p1f1']

NorESM2-LM ['r2i1p1f1']

MPI-ESM-1-2-HAM ['r1i1p1f1', 'r2i1p1f1']

SAM0-UNICON ['r1i1p1f1']

ACCESS-CM2 ['r1i1p1f1', 'r2i1p1f1']

MPI-ESM1-2-HR ['r8i1p1f1', 'r1i1p1f1', 'r6i1p1f1', 'r9i1p1f1', 'r5i1p1f1', 'r4i1p1f1', 'r10i1p1f1', 'r3i1p1f1', 'r2i1p1f1', 'r7i1p1f1']

MIROC-ES2L ['r3i1p1f2', 'r1i1p1f2', 'r2i1p1f2']

CESM2-WACCM-FV2 ['r1i1p1f1']

ACCESS-ESM1-5 ['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1']

CESM2 ['r8i1p1f1', 'r6i1p1f1', 'r11i1p1f1', 'r1i1p1f1', 'r9i1p1f1', 'r5i1p1f1', 'r10i1p1f1', 'r3i1p1f1', 'r4i1p1f1', 'r2i1p1f1', 'r7i1p1f1']

NorCPM1 ['r26i1p1f1', 'r23i1p1f1', 'r17i1p1f1', 'r24i1p1f1']

CanESM5 ['r8i1p1f1', 'r13i1p1f1', 'r13i1p2f1', 'r11i1p1f1', 'r7i1p2f1', 'r21i1p2f1', 'r5i1p1f1', 'r20i1p1f1

In [12]:
# Save dictionaries for future use
save_flag = True
if save_flag:
    if dset_dict:
        np.save('dset_dict_' + collection_name + '.npy', dset_dict)

In [12]:
# To-do
#1. add something for selecting specific time interval