In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs
from datetime import timedelta 
import metpy.calc as mpcalc
import cftime
import warnings
import climlab.utils.thermo as climlab

xr.set_options(display_style='html')
%matplotlib inline
%config InlineBackend.figure_format = 'retina' 
warnings.filterwarnings("ignore")

In [10]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

df_list = []
modelfilename_list = ['CanESM5','E3SM-1-0','GFDL-CM4','HadGEM3-GC31-LL','IPSL-CM6A-LR','MIROC-ES2L',
                     'MIROC6','MRI-ESM2-0','UKESM1-0-LL']
member_id = ['r1i1p1f1','r1i1p1f2','r1i1p1f3','r1i1p1f4']
modelvar_list = ['ta','ts','hur','sfcWind','wap','uas','vas']

kernel_path='/data/keeling/a/rytam2/a/kernels/gcms/*CCSM4*.nc'


cmip6 = df[(df['source_id'].isin(modelfilename_list)) & (df['member_id'].isin(member_id)) & (df['variable_id'].isin(modelvar_list)) & \
   (df['experiment_id'] == 'abrupt-4xCO2') & (df['table_id'] == 'Amon') ]

e3sm_var = ['ua','va']
e3sm_uava = df[(df['source_id']=='E3SM-1-0') & (df['member_id'].isin(member_id)) & (df['variable_id'].isin(e3sm_var)) & \
   (df['experiment_id'] == 'abrupt-4xCO2') & (df['table_id'] == 'Amon') ]

zstore = np.append(cmip6.zstore.values,e3sm_uava.zstore.values)

# this only needs to be created once
gcs = gcsfs.GCSFileSystem(token='anon')

In [3]:
dict_ds_t700 = {}
dict_ds_ts = {}
dict_ds_hur700 = {}
dict_ds_sfcWind = {}
dict_ds_uas = {}
dict_ds_vas = {}
dict_ds_wap = {}
dict_ds_tadv = {}

for path in zstore:
        # Open each file 
        ds = xr.open_zarr(path, consolidated=True)
        
        modelname = ds.attrs['parent_source_id']
        print(modelname)

        # Select variable from dataset 
        if 'ta' in path:
            ds = ds.ta.sel(plev=70000) #select 700hPa 
        elif 'ts' in path:
            ds = ds.ts
        elif 'hur' in path:
            ds = ds.hur.sel(plev=70000)
        elif 'sfcWind' in path:
            ds = ds.sfcWind
        elif 'ua' in path:
            if 'E3SM' in modelname:
                ds = ds.ua.sel(plev=1e5)
            else: 
                ds = ds.uas
        elif 'va' in path:
            if 'E3SM' in modelname:
                ds = ds.va.sel(plev=1e5)
            else: 
                ds = ds.vas
        elif 'wap' in path: 
            ds = ds.wap.sel(plev=70000)
        

        # Change lon coords
        ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon')

        # Unify time coords to start in year 1850
        ds = ds.isel(time=slice(0,150*12))
        
        #interp grids (selected CCSM4 for coordinates)
        kernel = xr.open_mfdataset(kernel_path)
        kernel_ds = kernel.dRdxi.isel(i=0).expand_dims({'time':ds.time}, axis=2)\
                    .assign_coords({'latitude':kernel.lat,'longitude':kernel.lon})\
                    .rename({'latitude':'lat','longitude':'lon'})
            
        # Interp variables and assign to dict 
        if 'ta' in path:
            # interp ts
            ds = ds.interp_like(kernel_ds)
            dict_ds_t700[modelname] = ds
        elif 'ts' in path:
            ds = ds.interp_like(kernel_ds)
            dict_ds_ts[modelname] = ds
        elif 'hur' in path:
            ds = ds.interp_like(kernel_ds)
            dict_ds_hur700[modelname] = ds
        elif 'sfcWind' in path:
            ds = ds.interp_like(kernel_ds)
            dict_ds_sfcWind[modelname] = ds
        elif 'ua' in path:
            ds = ds.interp_like(kernel_ds)
            dict_ds_uas[modelname] = ds
        elif 'va' in path:
            ds = ds.interp_like(kernel_ds)
            dict_ds_vas[modelname] = ds
        elif 'wap' in path:
            ds = ds.interp_like(kernel_ds)
            dict_ds_wap[modelname] = ds
            
            
        """if 'E3SM' in modelname or 'GFDL' in modelname:
            ds['time']=(ds.indexes['time']+timedelta(days=365*1849)).to_datetimeindex()#.normalize
        elif 'HadGEM3' in modelname or 'UKESM' in modelname: 
            ds['time']= ds.indexes['time'].to_datetimeindex()#.normalize
        elif 'MIROC-ES2L' in modelname or 'MRI-ESM2' in modelname or 'IPSL' in modelname: 
            ds['time']= ds.indexes['time']#.normalize
        elif 'MIROC6' in modelname:
            ds['time']=(ds.indexes['time']-timedelta(days=365*(1350+11/12))).to_datetimeindex()
        elif 'CanESM5' in modelname: 
            ds['time']=(ds.indexes['time']).to_datetimeindex()#.normalize
            """
        
        time_ds=xr.open_zarr(zstore[-12], consolidated=True)
        index=(time_ds.indexes['time'])
        ds['time']=index


for modelname in modelfilename_list:
    ds = mpcalc.advection(dict_ds_ts[modelname],u=dict_ds_uas[modelname], v=dict_ds_vas[modelname]) 
    dict_ds_tadv[modelname] = ds

    
        
# Change dictionaries to Xarray
ts = xr.Dataset(dict_ds_ts)
hur = xr.Dataset(dict_ds_hur700)
ws = xr.Dataset(dict_ds_sfcWind)
wap = xr.Dataset(dict_ds_wap)
tadv = xr.Dataset(dict_ds_tadv)
t700 = xr.Dataset(dict_ds_t700)
uas = xr.Dataset(dict_ds_uas)
vas = xr.Dataset(dict_ds_vas)

GFDL-CM4
GFDL-CM4
GFDL-CM4
GFDL-CM4
GFDL-CM4
GFDL-CM4
GFDL-CM4
IPSL-CM6A-LR
IPSL-CM6A-LR
IPSL-CM6A-LR
IPSL-CM6A-LR
IPSL-CM6A-LR
IPSL-CM6A-LR
IPSL-CM6A-LR
MRI-ESM2-0
MRI-ESM2-0
MRI-ESM2-0
MRI-ESM2-0
MRI-ESM2-0
MRI-ESM2-0
MRI-ESM2-0
UKESM1-0-LL
UKESM1-0-LL
UKESM1-0-LL
UKESM1-0-LL
UKESM1-0-LL
UKESM1-0-LL
UKESM1-0-LL
CanESM5
CanESM5
CanESM5
CanESM5
CanESM5
CanESM5
CanESM5
HadGEM3-GC31-LL
HadGEM3-GC31-LL
HadGEM3-GC31-LL
HadGEM3-GC31-LL
HadGEM3-GC31-LL
HadGEM3-GC31-LL
HadGEM3-GC31-LL
MIROC6
MIROC6
MIROC6
MIROC6
MIROC6
MIROC6
MIROC6
E3SM-1-0
E3SM-1-0
MIROC-ES2L
MIROC-ES2L
MIROC-ES2L
MIROC-ES2L
MIROC-ES2L
MIROC-ES2L
MIROC-ES2L
E3SM-1-0
E3SM-1-0
E3SM-1-0
E3SM-1-0
E3SM-1-0


In [4]:
# Calcualte EIS 
eis = climlab.EIS(ts,t700)

# Change tadv to dask 
tadv=tadv.chunk()

In [5]:
# Slice time period and save as nc files 
path='/data/keeling/a/rytam2/ccf_model_spread/data/preprocessed/'

ts.to_netcdf(path+'ts_4xCO2_CMIP6_185001_199912.nc')
eis.to_netcdf(path+'eis_4xCO2_CMIP6_185001_199912.nc')
hur.drop('plev').to_netcdf(path+'hur_4xCO2_CMIP6_185001_199912.nc')
ws.to_netcdf(path+'ws_4xCO2_CMIP6_185001_199912.nc')
wap.drop('plev').to_netcdf(path+'wap_4xCO2_CMIP6_185001_199912.nc')
tadv.to_netcdf(path+'tadv_4xCO2_CMIP6_185001_199912.nc')

ValueError: All sources must be dask array objects