In [1]:
## Get dependencies ##

import string
import math
import sys
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sn
sys.path.append('..')
from GIR import *
import scipy as sp
import pickle
import time
import scipy as sp
from scipy import ndimage
from scipy import signal
import os
import statsmodels.api as sm
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import glob
import requests
import ftplib
import PyPDF2
import io
import cmocean
import multiprocessing
import xarray as xr
import numpy as np
import pandas as pd
import requests
import xml.etree.ElementTree as ET
import zarr
import gcsfs
import multiprocessing

In [2]:
def esgf_search(server="https://esgf-node.llnl.gov/esg-search/search",
                files_type="OPENDAP", local_node=True, project="CMIP6",
                verbose=False,url_verbose=False, format="application%2Fsolr%2Bjson",
                use_csrf=False, **search):
    client = requests.session()
    payload = search
    payload["project"] = project
    payload["type"]= "File"
    if local_node:
        payload["distrib"] = "false"
    if use_csrf:
        client.get(server)
        if 'csrftoken' in client.cookies:
            # Django 1.6 and up
            csrftoken = client.cookies['csrftoken']
        else:
            # older versions
            csrftoken = client.cookies['csrf']
        payload["csrfmiddlewaretoken"] = csrftoken

    payload["format"] = format

    offset = 0
    numFound = 10000
    all_files = []
    files_type = files_type.upper()
    while offset < numFound:
        payload["offset"] = offset
        url_keys = [] 
        for k in payload:
            url_keys += ["{}={}".format(k, payload[k])]

        url = "{}/?{}".format(server, "&".join(url_keys))
        if url_verbose:
            print(url)
        r = client.get(url)
        r.raise_for_status()
        resp = r.json()["response"]
        numFound = int(resp["numFound"])
        resp = resp["docs"]
        offset += len(resp)
        for d in resp:
            if verbose:
                for k in d:
                    print("{}: {}".format(k,d[k]))
            url = d["url"]
            for f in d["url"]:
                sp = f.split("|")
                if sp[-1] == files_type:
                    all_files.append(sp[0].split(".html")[0])
    return sorted(all_files)

In [29]:
gs_stores = pd.read_csv('gs://cmip6/cmip6-zarr-consolidated-stores.csv')
gcs = gcsfs.GCSFileSystem(token='anon')

def get_annual_CMIP6_data_gstore(activity, table, variable, experiment, institution, source, member):
    # eg activity='CMIP', table='Amon', variable='tas', experiment='historical', institution="NCAR", source="CESM2", member="r10i1p1f1"
    
    query = gs_stores.query("activity_id==\'"+activity+"\' & table_id==\'"+table+"\' & variable_id==\'"+variable+"\' & experiment_id==\'"+experiment+"\' & institution_id==\'"+institution+"\' & source_id==\'"+source+"\' & member_id==\'"+member+"\'")
    
    if query.empty:
        print('No results for this request')
        return None

    # create a mutable-mapping-style interface to the store
    mapper = gcs.get_mapper(query.zstore.values[0])

    # open it using xarray and zarr
    ds = xr.open_zarr(mapper, consolidated=True)
    
    area_query = gs_stores.query("variable_id=='areacella' & source_id==\'"+source+"\'")
    
    if area_query.empty:
        files_area = esgf_search(variable_id='areacella', activity_id=activity, institution_id=institution, source_id=source)
        if not files_area:
            print('No areacella for this request')
            return None
        ds_area = xr.open_dataset(files_area[0])
    else:
        ds_area = xr.open_zarr(gcs.get_mapper(area_query.zstore.values[0]), consolidated=True)
    
    coords = list(ds[variable].coords.keys())
    if 'lat' in coords:
        dims = ['lat','lon']
    else:
        dims = ['latitude','longitude']
    
    total_area = ds_area.areacella.sum(dim=dims)
    ta_timeseries = (ds[variable] * ds_area.areacella).sum(dim=dims) / total_area
    
    return ta_timeseries.groupby('time.year').mean('time').to_pandas().rename(institution+'_'+source+'_'+member)

In [30]:
gs_stores.loc[:,'ism'] = gs_stores.loc[:,'institution_id'] + '_' + gs_stores.loc[:,'source_id'] + '_' + gs_stores.loc[:,'member_id']

In [41]:
abrupt_4x_ism = gs_stores.loc[(gs_stores.experiment_id=='abrupt-4xCO2')&(gs_stores.variable_id.isin(['tas','rlut','rsdt','rsut']))]
abrupt_4x_ism = list(set([x for x in abrupt_4x_ism.ism if abrupt_4x_ism.loc[abrupt_4x_ism.ism==x].shape[0]>=4]))

In [45]:
piControl_ism = gs_stores.loc[(gs_stores.experiment_id=='piControl')&(gs_stores.variable_id=='tas')]
piControl_ism = list(set(piControl_ism.ism))

In [51]:
areacella_s_gs = [x.split('_')[1] for x in list(set(gs_stores.loc[(gs_stores.variable_id=='areacella')].ism))]

In [52]:
areacella_list = esgf_search(activity_id='CMIP', variable_id='areacella')
areacella_list_nodupl = []

for item in areacella_list:
    if item.split('/')[-1] in [x.split('/')[-1] for x in areacella_list_nodupl]:
        continue
    else:
        areacella_list_nodupl += [item]
    
areacella_ism_list = list(set([x.split('/')[8]+'_'+x.split('/')[9]+'_'+x.split('/')[11] for x in areacella_list_nodupl]))
areacella_s_esgf = list(set([x.split('_')[1] for x in areacella_ism_list]))

In [60]:
areacella_s_all = list(set(areacella_s_gs).union(areacella_s_esgf))

In [65]:
abrupt_4x_ism_areacella_exist = [x for x in abrupt_4x_ism if x.split('_')[1] in areacella_s_all]
piControl_ism_areacella_exist = [x for x in piControl_ism if x.split('_')[1] in areacella_s_all]

In [66]:
def get_cmip6_data_gs(ism,var,exp):
    print('getting '+ism)
    ism_split = ism.split('_')
    _out = get_annual_CMIP6_data_gstore('CMIP', 'Amon', var, exp, ism_split[0], ism_split[1], ism_split[2])
    print('got '+ism)
    return _out

In [67]:
# P1=multiprocessing.Pool(processes=8)
# piControl_df = P1.starmap(get_cmip6_data_gs,[(x,'tas','piControl') for x in list(set(areacella_ism).intersection(set(piControl_ism)))])
# P1.close

In [72]:
# piControl_df_list = []
for ism in piControl_ism_areacella_exist:
    piControl_df_list += [get_cmip6_data_gs(ism,'tas','piControl')]

getting E3SM-Project_E3SM-1-1-ECA_r1i1p1f1
got E3SM-Project_E3SM-1-1-ECA_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p1f3
got NASA-GISS_GISS-E2-1-G_r1i1p1f3
getting MOHC_HadGEM3-GC31-MM_r1i1p1f1
got MOHC_HadGEM3-GC31-MM_r1i1p1f1
getting NOAA-GFDL_GFDL-ESM4_r1i1p1f1
got NOAA-GFDL_GFDL-ESM4_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r101i1p1f1
got NASA-GISS_GISS-E2-1-G_r101i1p1f1
getting IPSL_IPSL-CM6A-LR_r1i2p1f1
got IPSL_IPSL-CM6A-LR_r1i2p1f1
getting NCAR_CESM2-FV2_r1i1p1f1
got NCAR_CESM2-FV2_r1i1p1f1
getting E3SM-Project_E3SM-1-1_r1i1p1f1
got E3SM-Project_E3SM-1-1_r1i1p1f1
getting MPI-M_MPI-ESM1-2-HR_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MPI-M_MPI-ESM1-2-HR_r1i1p1f1
getting NCC_NorCPM1_r3i1p1f1
got NCC_NorCPM1_r3i1p1f1
getting E3SM-Project_E3SM-1-0_r1i1p1f1
got E3SM-Project_E3SM-1-0_r1i1p1f1
getting MRI_MRI-ESM2-0_r1i2p1f1
got MRI_MRI-ESM2-0_r1i2p1f1
getting CNRM-CERFACS_CNRM-ESM2-1_r1i1p1f2
got CNRM-CERFACS_CNRM-ESM2-1_r1i1p1f2
getting CCCma_CanESM5_r1i1p2f1
got CCCma_CanESM5_r1i1p2f1
getting IPSL_IPSL-CM6A-LR_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got IPSL_IPSL-CM6A-LR_r1i1p1f1
getting MIROC_MIROC-ES2L_r1i1p1f2


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MIROC_MIROC-ES2L_r1i1p1f2
getting NOAA-GFDL_GFDL-CM4_r1i1p1f1
got NOAA-GFDL_GFDL-CM4_r1i1p1f1
getting AWI_AWI-ESM-1-1-LR_r1i1p1f1
got AWI_AWI-ESM-1-1-LR_r1i1p1f1
getting INM_INM-CM5-0_r1i1p1f1
got INM_INM-CM5-0_r1i1p1f1
getting CSIRO_ACCESS-ESM1-5_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO_ACCESS-ESM1-5_r1i1p1f1
getting MIROC_MIROC6_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MIROC_MIROC6_r1i1p1f1
getting NCC_NorESM1-F_r1i1p1f1
got NCC_NorESM1-F_r1i1p1f1
getting NCAR_CESM2_r1i1p1f1
got NCAR_CESM2_r1i1p1f1
getting MRI_MRI-ESM2-0_r1i1p1f1
got MRI_MRI-ESM2-0_r1i1p1f1
getting MPI-M_MPI-ESM1-2-LR_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MPI-M_MPI-ESM1-2-LR_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p1f1
got NASA-GISS_GISS-E2-1-H_r1i1p1f1
getting AS-RCEC_TaiESM1_r1i1p1f1
got AS-RCEC_TaiESM1_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
got CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
getting NASA-GISS_GISS-E2-1-H_r1i1p3f1
got NASA-GISS_GISS-E2-1-H_r1i1p3f1
getting MPI-M_MPI-ESM1-2-LR_r2i1p1f1
got MPI-M_MPI-ESM1-2-LR_r2i1p1f1
getting NCC_NorESM2-LM_r1i1p1f1
got NCC_NorESM2-LM_r1i1p1f1
getting CMCC_CMCC-CM2-SR5_r1i1p1f1
got CMCC_CMCC-CM2-SR5_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G-CC_r1i1p1f1
got NASA-GISS_GISS-E2-1-G-CC_r1i1p1f1
getting BCC_BCC-ESM1_r1i1p1f1
got BCC_BCC-ESM1_r1i1p1f1
getting MOHC_HadGEM3-GC31-LL_r1i1p1f1
got MOHC_HadGEM3-GC31-LL_r1i1p1f1
getting HAMMOZ-Consortium_MPI-ESM-1-2-HAM_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got HAMMOZ-Consortium_MPI-ESM-1-2-HAM_r1i1p1f1
getting NCC_NorCPM1_r1i1p1f1
got NCC_NorCPM1_r1i1p1f1
getting NCC_NorESM2-MM_r1i1p1f1
got NCC_NorESM2-MM_r1i1p1f1
getting NCAR_CESM2-WACCM_r1i1p1f1
got NCAR_CESM2-WACCM_r1i1p1f1
getting EC-Earth-Consortium_EC-Earth3-Veg-LR_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got EC-Earth-Consortium_EC-Earth3-Veg-LR_r1i1p1f1
getting SNU_SAM0-UNICON_r1i1p1f1
got SNU_SAM0-UNICON_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p5f1
got NASA-GISS_GISS-E2-1-G_r1i1p5f1
getting MOHC_UKESM1-0-LL_r1i1p1f2
got MOHC_UKESM1-0-LL_r1i1p1f2
getting NASA-GISS_GISS-E2-1-G_r102i1p1f1
got NASA-GISS_GISS-E2-1-G_r102i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r1i1p1f2


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CNRM-CERFACS_CNRM-CM6-1_r1i1p1f2
getting NASA-GISS_GISS-E2-1-G_r1i1p3f1
got NASA-GISS_GISS-E2-1-G_r1i1p3f1
getting NCAR_CESM2-WACCM-FV2_r1i1p1f1
got NCAR_CESM2-WACCM-FV2_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p1f2
got NASA-GISS_GISS-E2-1-G_r1i1p1f2
getting UA_MCM-UA-1-0_r1i1p1f1
got UA_MCM-UA-1-0_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p1f1
got NASA-GISS_GISS-E2-1-G_r1i1p1f1
getting INM_INM-CM4-8_r1i1p1f1
got INM_INM-CM4-8_r1i1p1f1


In [93]:
piControl_rlut_df_list = []
for ism in piControl_ism_areacella_exist:
    piControl_rlut_df_list += [get_cmip6_data_gs(ism,'rlut','piControl')]

getting NCC_NorCPM1_r2i1p1f1
got NCC_NorCPM1_r2i1p1f1
getting CCCma_CanESM5_r1i1p1f1
got CCCma_CanESM5_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r2i1p1f1
got NASA-GISS_GISS-E2-1-G_r2i1p1f1
getting CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1
getting CAMS_CAMS-CSM1-0_r1i1p1f1
got CAMS_CAMS-CSM1-0_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p3f1
No results for this request
got NASA-GISS_GISS-E2-1-H_r1i1p3f1
getting CMCC_CMCC-CM2-SR5_r1i1p1f1
No results for this request
got CMCC_CMCC-CM2-SR5_r1i1p1f1
getting EC-Earth-Consortium_EC-Earth3-Veg-LR_r1i1p1f1
No results for this request
got EC-Earth-Consortium_EC-Earth3-Veg-LR_r1i1p1f1


In [94]:
piControl_rsut_df_list = []
for ism in piControl_ism_areacella_exist:
    piControl_rsut_df_list += [get_cmip6_data_gs(ism,'rsut','piControl')]

getting NCC_NorCPM1_r2i1p1f1
got NCC_NorCPM1_r2i1p1f1
getting CCCma_CanESM5_r1i1p1f1
got CCCma_CanESM5_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r2i1p1f1
got NASA-GISS_GISS-E2-1-G_r2i1p1f1
getting CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1
getting CAMS_CAMS-CSM1-0_r1i1p1f1
got CAMS_CAMS-CSM1-0_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p3f1
No results for this request
got NASA-GISS_GISS-E2-1-H_r1i1p3f1
getting CMCC_CMCC-CM2-SR5_r1i1p1f1
No results for this request
got CMCC_CMCC-CM2-SR5_r1i1p1f1
getting UA_MCM-UA-1-0_r1i1p1f1
No results for this request
got UA_MCM-UA-1-0_r1i1p1f1


In [95]:
piControl_rsdt_df_list = []
for ism in piControl_ism_areacella_exist:
    piControl_rsdt_df_list += [get_cmip6_data_gs(ism,'rsdt','piControl')]

getting NCC_NorCPM1_r2i1p1f1
got NCC_NorCPM1_r2i1p1f1
getting CCCma_CanESM5_r1i1p1f1
got CCCma_CanESM5_r1i1p1f1
getting NASA-GISS_GISS-E2-1-G_r2i1p1f1
got NASA-GISS_GISS-E2-1-G_r2i1p1f1
getting CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1
getting CAMS_CAMS-CSM1-0_r1i1p1f1
got CAMS_CAMS-CSM1-0_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p3f1
No results for this request
got NASA-GISS_GISS-E2-1-H_r1i1p3f1
getting CMCC_CMCC-CM2-SR5_r1i1p1f1
No results for this request
got CMCC_CMCC-CM2-SR5_r1i1p1f1
getting EC-Earth-Consortium_EC-Earth3-Veg-LR_r1i1p1f1
No results for this request
got EC-Earth-Consortium_EC-Earth3-Veg-LR_r1i1p1f1
getting UA_MCM-UA-1-0_r1i1p1f1
No results for this request
got UA_MCM-UA-1-0_r1i1p1f1


In [73]:
abrutp4x_tas_df_list = []
for ism in abrupt_4x_ism_areacella_exist:
    abrutp4x_tas_df_list += [get_cmip6_data_gs(ism,'tas','abrupt-4xCO2')]

getting IPSL_IPSL-CM6A-LR_r3i1p1f1
got IPSL_IPSL-CM6A-LR_r3i1p1f1
getting CCCma_CanESM5_r1i1p1f1
got CCCma_CanESM5_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
getting CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
got CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
getting CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1
getting CAMS_CAMS-CSM1-0_r1i1p1f1
got CAMS_CAMS-CSM1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r2i1p1f1
got IPSL_IPSL-CM6A-LR_r2i1p1f1
getting IPSL_IPSL-CM6A-LR_r6i1p1f1
got IPSL_IPSL-CM6A-LR_r6i1p1f1
getting IPSL_IPSL-CM6A-LR_r9i1p1f1
got IPSL_IPSL-CM6A-LR_r9i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p1f3
got NASA-GISS_GISS-E2-1-G_r1i1p1f3
getting NOAA-GFDL_GFDL-ESM4_r1i1p1f1
got NOAA-GFDL_GFDL-ESM4_r1i1p1f1
getting MRI_MRI-ESM2-0_r11i1p1f1
got MRI_MRI-ESM2-0_r11i1p1f1
getting MRI_MRI-ESM2-0_r3i1p1f1
got MRI_MRI-ESM2-0_r3i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
getting MPI-M_MPI-ESM1-2-HR_r1i1p1f1
got MPI-M_MPI-ESM1-2-HR_r1i1p1f1
getting MRI_MRI-ESM2-0_r6i1p1f1
got MRI_MRI-ESM2-0_r6i1p1f1
getting E3SM-Project_E3SM-1-0_r1i1p1f1
got E3SM-Project_E3SM-1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r10i1p1f1
got IPSL_IPSL-CM6A-LR_r10i1p1f1
getting MRI_MRI-ESM2-0_r1i2p1f1
got MRI_MRI-ESM2-0_r1i2p1f1
getting MRI_MRI-ESM2-0_r1

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO_ACCESS-ESM1-5_r1i1p1f1
getting MIROC_MIROC6_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MIROC_MIROC6_r1i1p1f1
getting MRI_MRI-ESM2-0_r13i1p1f1
got MRI_MRI-ESM2-0_r13i1p1f1
getting MRI_MRI-ESM2-0_r10i1p1f1
got MRI_MRI-ESM2-0_r10i1p1f1
getting NCAR_CESM2_r1i1p1f1
got NCAR_CESM2_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
getting MRI_MRI-ESM2-0_r1i1p1f1
got MRI_MRI-ESM2-0_r1i1p1f1
getting MPI-M_MPI-ESM1-2-LR_r1i1p1f1
got MPI-M_MPI-ESM1-2-LR_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p1f1
got NASA-GISS_GISS-E2-1-H_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
got CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
getting MOHC_HadGEM3-GC31-LL_r1i1p1f3
got MOHC_HadGEM3-GC31-LL_r1i1p1f3
getting NCC_NorESM2-LM_r1i1p1f1
got NCC_NorESM2-LM_r1i1p1f1
getting MRI_MRI-ESM2-0_r4i1p1f1
got MRI_MRI-ESM2-0_r4i1p1f1
getting IPSL_IPSL-CM6A-LR_r12i1p1f1
got IPSL_IPSL-CM6A-LR_r12i1p1f1
getting MOHC_HadGEM3-GC31-MM_r1i1p1f3
got MOHC_HadGEM3-GC31-MM_r1i1p1f3
getting BCC_BCC-ESM1_r1i1p1f1
got BCC_BCC-ESM1_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r5i1p1f2
got

In [77]:
abrutp4x_rlut_df_list = []
for ism in abrupt_4x_ism_areacella_exist:
    abrutp4x_rlut_df_list += [get_cmip6_data_gs(ism,'rlut','abrupt-4xCO2')]

getting IPSL_IPSL-CM6A-LR_r3i1p1f1
got IPSL_IPSL-CM6A-LR_r3i1p1f1
getting CCCma_CanESM5_r1i1p1f1
got CCCma_CanESM5_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
getting CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
got CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
getting CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1
getting CAMS_CAMS-CSM1-0_r1i1p1f1
got CAMS_CAMS-CSM1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r2i1p1f1
got IPSL_IPSL-CM6A-LR_r2i1p1f1
getting IPSL_IPSL-CM6A-LR_r6i1p1f1
got IPSL_IPSL-CM6A-LR_r6i1p1f1
getting IPSL_IPSL-CM6A-LR_r9i1p1f1
got IPSL_IPSL-CM6A-LR_r9i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p1f3
got NASA-GISS_GISS-E2-1-G_r1i1p1f3
getting NOAA-GFDL_GFDL-ESM4_r1i1p1f1
got NOAA-GFDL_GFDL-ESM4_r1i1p1f1
getting MRI_MRI-ESM2-0_r11i1p1f1
got MRI_MRI-ESM2-0_r11i1p1f1
getting MRI_MRI-ESM2-0_r3i1p1f1
got MRI_MRI-ESM2-0_r3i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
getting MPI-M_MPI-ESM1-2-HR_r1i1p1f1
got MPI-M_MPI-ESM1-2-HR_r1i1p1f1
getting MRI_MRI-ESM2-0_r6i1p1f1
got MRI_MRI-ESM2-0_r6i1p1f1
getting E3SM-Project_E3SM-1-0_r1i1p1f1
got E3SM-Project_E3SM-1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r10i1p1f1
got IPSL_IPSL-CM6A-LR_r10i1p1f1
getting MRI_MRI-ESM2-0_r1i2p1f1
got MRI_MRI-ESM2-0_r1i2p1f1
getting MRI_MRI-ESM2-0_r1

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got IPSL_IPSL-CM6A-LR_r1i1p1f1
getting MIROC_MIROC-ES2L_r1i1p1f2
got MIROC_MIROC-ES2L_r1i1p1f2
getting NOAA-GFDL_GFDL-CM4_r1i1p1f1
got NOAA-GFDL_GFDL-CM4_r1i1p1f1
getting INM_INM-CM5-0_r1i1p1f1
got INM_INM-CM5-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r5i1p1f1
got IPSL_IPSL-CM6A-LR_r5i1p1f1
getting CSIRO_ACCESS-ESM1-5_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO_ACCESS-ESM1-5_r1i1p1f1
getting MIROC_MIROC6_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MIROC_MIROC6_r1i1p1f1
getting MRI_MRI-ESM2-0_r13i1p1f1
got MRI_MRI-ESM2-0_r13i1p1f1
getting MRI_MRI-ESM2-0_r10i1p1f1
got MRI_MRI-ESM2-0_r10i1p1f1
getting NCAR_CESM2_r1i1p1f1
got NCAR_CESM2_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
getting MRI_MRI-ESM2-0_r1i1p1f1
got MRI_MRI-ESM2-0_r1i1p1f1
getting MPI-M_MPI-ESM1-2-LR_r1i1p1f1
got MPI-M_MPI-ESM1-2-LR_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p1f1
got NASA-GISS_GISS-E2-1-H_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
got CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
getting MOHC_HadGEM3-GC31-LL_r1i1p1f3
got MOHC_HadGEM3-GC31-LL_r1i1p1f3
getting NCC_NorESM2-LM_r1i1p1f1
got NCC_NorESM2-LM_r1i1p1f1
getting MRI_MRI-ESM2-0_r4i1p1f1
got MRI_MRI-ESM2-0_r4i1p1f1
getting IPSL_IPSL-CM6A-LR_r12i1p1f1
got IPSL_IPSL-CM6A-LR_r12i1p1f1
getting MOHC_HadGEM3-GC31-MM_r1i1p1f3
got MOHC_HadGEM3-GC31-MM_r1i1p1f3
getting BCC_BCC-ESM1_r1i1p1f1
got BCC_BCC-ESM1_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r5i1p1f2
got

In [75]:
abrutp4x_rsut_df_list = []
for ism in abrupt_4x_ism_areacella_exist:
    abrutp4x_rsut_df_list += [get_cmip6_data_gs(ism,'rsut','abrupt-4xCO2')]

getting IPSL_IPSL-CM6A-LR_r3i1p1f1
got IPSL_IPSL-CM6A-LR_r3i1p1f1
getting CCCma_CanESM5_r1i1p1f1
got CCCma_CanESM5_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
getting CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
got CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
getting CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1
getting CAMS_CAMS-CSM1-0_r1i1p1f1
got CAMS_CAMS-CSM1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r2i1p1f1
got IPSL_IPSL-CM6A-LR_r2i1p1f1
getting IPSL_IPSL-CM6A-LR_r6i1p1f1
got IPSL_IPSL-CM6A-LR_r6i1p1f1
getting IPSL_IPSL-CM6A-LR_r9i1p1f1
got IPSL_IPSL-CM6A-LR_r9i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p1f3
got NASA-GISS_GISS-E2-1-G_r1i1p1f3
getting NOAA-GFDL_GFDL-ESM4_r1i1p1f1
got NOAA-GFDL_GFDL-ESM4_r1i1p1f1
getting MRI_MRI-ESM2-0_r11i1p1f1
got MRI_MRI-ESM2-0_r11i1p1f1
getting MRI_MRI-ESM2-0_r3i1p1f1
got MRI_MRI-ESM2-0_r3i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
getting MPI-M_MPI-ESM1-2-HR_r1i1p1f1
got MPI-M_MPI-ESM1-2-HR_r1i1p1f1
getting MRI_MRI-ESM2-0_r6i1p1f1
got MRI_MRI-ESM2-0_r6i1p1f1
getting E3SM-Project_E3SM-1-0_r1i1p1f1
got E3SM-Project_E3SM-1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r10i1p1f1
got IPSL_IPSL-CM6A-LR_r10i1p1f1
getting MRI_MRI-ESM2-0_r1i2p1f1
got MRI_MRI-ESM2-0_r1i2p1f1
getting MRI_MRI-ESM2-0_r1

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO_ACCESS-ESM1-5_r1i1p1f1
getting MIROC_MIROC6_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MIROC_MIROC6_r1i1p1f1
getting MRI_MRI-ESM2-0_r13i1p1f1
got MRI_MRI-ESM2-0_r13i1p1f1
getting MRI_MRI-ESM2-0_r10i1p1f1
got MRI_MRI-ESM2-0_r10i1p1f1
getting NCAR_CESM2_r1i1p1f1
got NCAR_CESM2_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
getting MRI_MRI-ESM2-0_r1i1p1f1
got MRI_MRI-ESM2-0_r1i1p1f1
getting MPI-M_MPI-ESM1-2-LR_r1i1p1f1
got MPI-M_MPI-ESM1-2-LR_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p1f1
got NASA-GISS_GISS-E2-1-H_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
got CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
getting MOHC_HadGEM3-GC31-LL_r1i1p1f3
got MOHC_HadGEM3-GC31-LL_r1i1p1f3
getting NCC_NorESM2-LM_r1i1p1f1
got NCC_NorESM2-LM_r1i1p1f1
getting MRI_MRI-ESM2-0_r4i1p1f1
got MRI_MRI-ESM2-0_r4i1p1f1
getting IPSL_IPSL-CM6A-LR_r12i1p1f1
got IPSL_IPSL-CM6A-LR_r12i1p1f1
getting MOHC_HadGEM3-GC31-MM_r1i1p1f3
got MOHC_HadGEM3-GC31-MM_r1i1p1f3
getting BCC_BCC-ESM1_r1i1p1f1
got BCC_BCC-ESM1_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r5i1p1f2
got

In [76]:
abrutp4x_rsdt_df_list = []
for ism in abrupt_4x_ism_areacella_exist:
    abrutp4x_rsdt_df_list += [get_cmip6_data_gs(ism,'rsdt','abrupt-4xCO2')]

getting IPSL_IPSL-CM6A-LR_r3i1p1f1
got IPSL_IPSL-CM6A-LR_r3i1p1f1
getting CCCma_CanESM5_r1i1p1f1
got CCCma_CanESM5_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r6i1p1f2
getting CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
got CNRM-CERFACS_CNRM-ESM2-1_r3i1p1f2
getting CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO-ARCCSS_ACCESS-CM2_r1i1p1f1
getting CAMS_CAMS-CSM1-0_r1i1p1f1
got CAMS_CAMS-CSM1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r2i1p1f1
got IPSL_IPSL-CM6A-LR_r2i1p1f1
getting IPSL_IPSL-CM6A-LR_r6i1p1f1
got IPSL_IPSL-CM6A-LR_r6i1p1f1
getting IPSL_IPSL-CM6A-LR_r9i1p1f1
got IPSL_IPSL-CM6A-LR_r9i1p1f1
getting NASA-GISS_GISS-E2-1-G_r1i1p1f3
got NASA-GISS_GISS-E2-1-G_r1i1p1f3
getting NOAA-GFDL_GFDL-ESM4_r1i1p1f1
got NOAA-GFDL_GFDL-ESM4_r1i1p1f1
getting MRI_MRI-ESM2-0_r11i1p1f1
got MRI_MRI-ESM2-0_r11i1p1f1
getting MRI_MRI-ESM2-0_r3i1p1f1
got MRI_MRI-ESM2-0_r3i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r3i1p1f2
getting MPI-M_MPI-ESM1-2-HR_r1i1p1f1
got MPI-M_MPI-ESM1-2-HR_r1i1p1f1
getting MRI_MRI-ESM2-0_r6i1p1f1
got MRI_MRI-ESM2-0_r6i1p1f1
getting E3SM-Project_E3SM-1-0_r1i1p1f1
got E3SM-Project_E3SM-1-0_r1i1p1f1
getting IPSL_IPSL-CM6A-LR_r10i1p1f1
got IPSL_IPSL-CM6A-LR_r10i1p1f1
getting MRI_MRI-ESM2-0_r1i2p1f1
got MRI_MRI-ESM2-0_r1i2p1f1
getting MRI_MRI-ESM2-0_r1

  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got CSIRO_ACCESS-ESM1-5_r1i1p1f1
getting MIROC_MIROC6_r1i1p1f1


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


got MIROC_MIROC6_r1i1p1f1
getting MRI_MRI-ESM2-0_r13i1p1f1
got MRI_MRI-ESM2-0_r13i1p1f1
getting MRI_MRI-ESM2-0_r10i1p1f1
got MRI_MRI-ESM2-0_r10i1p1f1
getting NCAR_CESM2_r1i1p1f1
got NCAR_CESM2_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
got CNRM-CERFACS_CNRM-CM6-1_r2i1p1f2
getting MRI_MRI-ESM2-0_r1i1p1f1
got MRI_MRI-ESM2-0_r1i1p1f1
getting MPI-M_MPI-ESM1-2-LR_r1i1p1f1
got MPI-M_MPI-ESM1-2-LR_r1i1p1f1
getting NASA-GISS_GISS-E2-1-H_r1i1p1f1
got NASA-GISS_GISS-E2-1-H_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
got CNRM-CERFACS_CNRM-CM6-1-HR_r1i1p1f2
getting MOHC_HadGEM3-GC31-LL_r1i1p1f3
got MOHC_HadGEM3-GC31-LL_r1i1p1f3
getting NCC_NorESM2-LM_r1i1p1f1
got NCC_NorESM2-LM_r1i1p1f1
getting MRI_MRI-ESM2-0_r4i1p1f1
got MRI_MRI-ESM2-0_r4i1p1f1
getting IPSL_IPSL-CM6A-LR_r12i1p1f1
got IPSL_IPSL-CM6A-LR_r12i1p1f1
getting MOHC_HadGEM3-GC31-MM_r1i1p1f3
got MOHC_HadGEM3-GC31-MM_r1i1p1f3
getting BCC_BCC-ESM1_r1i1p1f1
got BCC_BCC-ESM1_r1i1p1f1
getting CNRM-CERFACS_CNRM-CM6-1_r5i1p1f2
got

In [80]:
abrutp4x_rsdt_df = pd.concat(abrutp4x_rsdt_df_list,axis=1)
abrutp4x_rsut_df = pd.concat(abrutp4x_rsut_df_list,axis=1)
abrutp4x_rlut_df = pd.concat(abrutp4x_rlut_df_list,axis=1)
abrutp4x_tas_df = pd.concat(abrutp4x_tas_df_list,axis=1)
piControl_tas_df = pd.concat(piControl_df_list,axis=1)
piControl_rsdt_df = pd.concat(piControl_rsdt_df_list,axis=1)
piControl_rsut_df = pd.concat(piControl_rsut_df_list,axis=1)
piControl_rlut_df = pd.concat(piControl_rlut_df_list,axis=1)

# piControl_rlut_df.to_csv('./cmip6_data/piControl_rlut.csv')
# piControl_rsut_df.to_csv('./cmip6_data/piControl_rsut.csv')
# piControl_rsdt_df.to_csv('./cmip6_data/piControl_rsdt.csv')
# abrutp4x_rsdt_df.to_csv('./cmip6_data/abrupt-4xCO2_rsdt.csv')
# abrutp4x_rsut_df.to_csv('./cmip6_data/abrupt-4xCO2_rsut.csv')
# abrutp4x_rlut_df.to_csv('./cmip6_data/abrupt-4xCO2_rlut.csv')
# abrutp4x_tas_df.to_csv('./cmip6_data/abrupt-4xCO2_tas.csv')
# piControl_tas_df.to_csv('./cmip6_data/piControl_tas.csv')

In [3]:
def get_annual_CMIP6_data_esgf(activity, table, variable, experiment, institution, source, member):
    # eg activity='CMIP', table='Amon', variable='tas', experiment='historical', institution="NCAR", source="CESM2", member="r10i1p1f1"
    
    result = esgf_search(activity_id=activity, table_id=table, variable_id=variable, experiment_id=experiment,institution_id=institution, source_id=source, member_id=member)
    
    if not result:
        print('No results for this request')
        return None
    
    # select results with only the latest datestamp:
    latest = sorted([x.split('/')[15] for x in result])[-1]
    result = [x for x in result if x.split('/')[15]==latest]
    
    # remove duplicate results
    result_1 = []

    for item in result:
        if item.split('/')[-1] in [x.split('/')[-1] for x in result_1]:
            continue
        else:
            result_1 += [item]

    ds = xr.open_mfdataset(result_1, combine='by_coords')
    
    files_area = esgf_search(variable_id='areacella', activity_id=activity, institution_id=institution, source_id=source)
    
    if not files_area:
        print('No areacella for this request')
        return None
    
    ds_area = xr.open_dataset(files_area[0])
    
    coords = list(ds[variable].coords.keys())
    if 'lat' in coords:
        dims = ['lat','lon']
    else:
        dims = ['latitude','longitude']
    
    total_area = ds_area.areacella.sum(dim=dims)
    ta_timeseries = (ds[variable] * ds_area.areacella).sum(dim=dims) / total_area
    
    return ta_timeseries.groupby('time.year').mean('time').to_pandas().rename(institution+'_'+source+'_'+member)

In [24]:
def get_annual_CMIP6_data_esgf_multivar(activity, table, variables, experiment, institution, source, member):
    # eg activity='CMIP', table='Amon', variable='tas', experiment='historical', institution="NCAR", source="CESM2", member="r10i1p1f1"
    
    result = esgf_search(activity_id=activity, table_id=table, experiment_id=experiment,institution_id=institution, source_id=source, member_id=member)
    
    result = [x for x in result if x.split('/')[13] in variables]
    
    if not result:
        print('No results for this request')
        return None
    
    # select results with only the latest datestamp:
#     latest = sorted([x.split('/')[15] for x in result])[-1]
#     result = [x for x in result if x.split('/')[15]==latest]
    
    # remove duplicate results
    result_1 = []

    for item in result:
        if item.split('/')[-1] in [x.split('/')[-1] for x in result_1]:
            continue
        else:
            result_1 += [item]

    ds = xr.open_mfdataset(result_1, combine='by_coords')
    
    files_area = esgf_search(variable_id='areacella', activity_id=activity, institution_id=institution, source_id=source)
    
    if not files_area:
        print('No areacella for this request')
        return None
    
    ds_area = xr.open_dataset(files_area[0])
    
    coords = list(ds[variables].coords.keys())
    if 'lat' in coords:
        dims = ['lat','lon']
    else:
        dims = ['latitude','longitude']
    
    total_area = ds_area.areacella.sum(dim=dims)
    ta_timeseries = (ds[variables] * ds_area.areacella).sum(dim=dims) / total_area
    
    _out = ta_timeseries.groupby('time.year').mean('time').to_dataframe()[variables]
    
    return pd.concat([_out],axis=1,keys=[institution+'_'+source+'_'+member])

In [5]:
piControl_list = esgf_search(activity_id='CMIP', table_id='Amon', variable_id='tas', experiment_id='piControl')
piControl_list_nodupl = []

for item in piControl_list:
    if item.split('/')[-1] in [x.split('/')[-1] for x in piControl_list_nodupl]:
        continue
    else:
        piControl_list_nodupl += [item]
        
abrupt4x_list = esgf_search(activity_id='CMIP', table_id='Amon', variable_id='tas', experiment_id='abrupt-4xCO2')
abrupt4x_list_nodupl = []

for item in abrupt4x_list:
    if item.split('/')[-1] in [x.split('/')[-1] for x in abrupt4x_list_nodupl]:
        continue
    else:
        abrupt4x_list_nodupl += [item]
        
areacella_list = esgf_search(activity_id='CMIP', variable_id='areacella')
areacella_list_nodupl = []

for item in areacella_list:
    if item.split('/')[-1] in [x.split('/')[-1] for x in areacella_list_nodupl]:
        continue
    else:
        areacella_list_nodupl += [item]
        
abrupt4x_ism_list = list(set([x.split('/')[8]+'_'+x.split('/')[9]+'_'+x.split('/')[11] for x in abrupt4x_list_nodupl]))
piControl_ism_list = list(set([x.split('/')[8]+'_'+x.split('/')[9]+'_'+x.split('/')[11] for x in piControl_list_nodupl]))
areacella_ism_list = list(set([x.split('/')[8]+'_'+x.split('/')[9]+'_'+x.split('/')[11] for x in areacella_list_nodupl]))
areacella_s_list = list(set([x.split('_')[1] for x in areacella_ism_list]))

piControl_ism_areacella_exist = [x for x in piControl_ism_list if x.split('_')[1] in areacella_s_list]
abrupt4x_ism_areacella_exist = [x for x in abrupt4x_ism_list if x.split('_')[1] in areacella_s_list]

In [14]:
abrupt4x_tas_df = pd.read_csv('./cmip6_data/abrupt-4xCO2_tas.csv',index_col=0)
esgf_abrupt4x_list = [x for x in abrupt4x_ism_areacella_exist if not x in abrupt4x_tas_df.columns]

piControl_tas_df = pd.read_csv('./cmip6_data/piControl_tas.csv',index_col=0)
esgf_piControl_list = [x for x in piControl_ism_areacella_exist if not x in piControl_tas_df.columns]

In [20]:
def get_CMIP6_data(ism,exp='abrupt-4xCO2',var='tas',multivar=False):
    ism_split = ism.split('_')
    if multivar:
        _out = get_annual_CMIP6_data_esgf_multivar('CMIP', 'Amon', var, exp, ism_split[0], ism_split[1], ism_split[2])
    else:
        _out = get_annual_CMIP6_data_esgf('CMIP', 'Amon', var, exp, ism_split[0], ism_split[1], ism_split[2])
    print(ism+' complete')
    return _out

In [25]:
# abrupt4x_df_list_esgf = []
for x in esgf_abrupt4x_list:
    abrupt4x_df_list_esgf += [get_CMIP6_data(x,'abrupt-4xCO2',['tas','rlut','rsut','rsdt'],True)]
# P1=multiprocessing.Pool(processes=8)
# abrupt4xCO2_df = P1.starmap(get_CMIP6_data,[(x,'abrupt-4xCO2',['tas','rlut','rsut','rsdt'],True) for x in abrupt4x_ism_areacella_exist])
# P1.close

CAMS_CAMS-CSM1-0_r1i1p1f1 complete
IPSL_IPSL-CM6A-LR_r10i1p1f1 complete


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array

OverflowError: Python int too large to convert to C long

Exception ignored in: 'pandas._libs.algos.are_diff'
Traceback (most recent call last):
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/xarray/coding/cftimeindex.py", line 433, in __sub__
    return pd.TimedeltaIndex(np.array(self) - np.array(other))
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/timedeltas.py", line 258, in __new__
    data, freq=freq, unit=unit, dtype=dtype, copy=copy
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 272, in _from_sequence
    data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 973, in sequence_to_td64ns
    data = objects_to_td64ns(data, unit=unit, errors=errors)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 1098, in objects_to_td64ns
    result = array_to_timedelta64(values, unit=unit, er

OverflowError: Python int too large to convert to C long

Exception ignored in: 'pandas._libs.algos.are_diff'
Traceback (most recent call last):
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/xarray/coding/cftimeindex.py", line 433, in __sub__
    return pd.TimedeltaIndex(np.array(self) - np.array(other))
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/timedeltas.py", line 258, in __new__
    data, freq=freq, unit=unit, dtype=dtype, copy=copy
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 272, in _from_sequence
    data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 973, in sequence_to_td64ns
    data = objects_to_td64ns(data, unit=unit, errors=errors)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 1098, in objects_to_td64ns
    result = array_to_timedelta64(values, unit=unit, er

OverflowError: Python int too large to convert to C long

Exception ignored in: 'pandas._libs.algos.are_diff'
Traceback (most recent call last):
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/xarray/coding/cftimeindex.py", line 433, in __sub__
    return pd.TimedeltaIndex(np.array(self) - np.array(other))
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/timedeltas.py", line 258, in __new__
    data, freq=freq, unit=unit, dtype=dtype, copy=copy
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 272, in _from_sequence
    data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 973, in sequence_to_td64ns
    data = objects_to_td64ns(data, unit=unit, errors=errors)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 1098, in objects_to_td64ns
    result = array_to_timedelta64(values, unit=unit, er

OverflowError: Python int too large to convert to C long

Exception ignored in: 'pandas._libs.algos.are_diff'
Traceback (most recent call last):
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/xarray/coding/cftimeindex.py", line 433, in __sub__
    return pd.TimedeltaIndex(np.array(self) - np.array(other))
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/indexes/timedeltas.py", line 258, in __new__
    data, freq=freq, unit=unit, dtype=dtype, copy=copy
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 272, in _from_sequence
    data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 973, in sequence_to_td64ns
    data = objects_to_td64ns(data, unit=unit, errors=errors)
  File "/home/leachl/miniconda3/lib/python3.7/site-packages/pandas/core/arrays/timedeltas.py", line 1098, in objects_to_td64ns
    result = array_to_timedelta64(values, unit=unit, er

AttributeError: 'IndexVariable' object has no attribute 'year'

In [62]:
piControl_df_list_esgf = []
for x in piControl_ism_areacella_exist:
    piControl_df_list_esgf += [get_CMIP6_data(x,'piControl','tas')]
# P1=multiprocessing.Pool(processes=8)
# piControl_df = P1.starmap(get_CMIP6_data,[(x,'piControl','tas',False) for x in piControl_ism_areacella_exist])
# P1.close

NASA-GISS_GISS-E2-1-G_r1i1p1f2 complete
NASA-GISS_GISS-E2-1-G_r1i1p1f3 complete


RuntimeError: NetCDF: Access failure