In [1]:
%reset
import numpy as np 
import pandas as pd 
import xarray as xr 
import cartopy 
import glob
import matplotlib.pyplot as plt
import cartopy
import cartopy.crs as ccrs
import matplotlib.ticker as mticker
import climlab.utils.thermo as climlab
import scipy 
import datetime
import metpy.calc as mpcalc

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


# Load Data

In [2]:
root_path='/data/keeling/a/rytam2/ccf_model_spread/data/raw/amip/'
kernel_path='/data/keeling/a/rytam2/a/model_kernels/gcms/'

Note: Updated files' location and placed shortcuts in root_path location
May need to add snippet of code to select time period 1982-2008 (See hist_data for reference) 

(Copied from hist_data notebook) 
###### Unify time coords to start in year 185912 and end in 200511
        if modelname != 'hg2':
            ds = ds.isel(time=slice(9*12+11,155*12+11))
        elif modelname == 'hg2':
            ds = ds.isel(time=slice(0,155*12+11))

# Process Data
For each model, 
1. Select all ta/ts files 
2. Extract t700 from ta files for EIS calculation and ts from ts files for SST
3. Adjust data lon coordinates from 0:360 to -180:180 (to match kernel coordinates) 
4. Transpose dimensions for interp_like method to work
5. Extract coordinates from Kernels 
6. Interpolate the grid from 1x1 to 5x5 with interp_like 
7. Adjust time coordinates for merging 

In [4]:
modelfilename_list = ['CCSM4','CanAM4','CanESM5','E3SM','GFDL','HadGEM2','HadGEM3','IPSL','MIROC-ES2L','MIROC-ESM',
                     'MIROC5','MIROC6','MPI-ESM','MRI-CGCM3','MRI-ESM2','UKESM']

modelname_list=['ccsm4','canam4','canesm5','e3sm','gfdl','hg2','hg3','ipsl','mies2l','miesm','mi5','mi6','mpi','mrcgcm','mresm','ukesm']

modelvar_list = ['ta','ts','hur','sfcWind','wap','ua','va']

dict_ds_t700 = {}
dict_ds_ts = {}
dict_ds_hur700 = {}
dict_ds_sfcWind = {}
dict_ds_uas = {}
dict_ds_vas = {}
dict_ds_wap = {}
dict_ds_tadv = {}

for modelname in modelname_list:

    for var in modelvar_list:
        # Get all files 
        filepath = glob.glob(root_path+'%s*%s*.nc'%(var,modelfilename_list[modelname_list.index(modelname)]))

        # If-loop Combine all .nc files as one dataset 
        if len(filepath)==1:
            ds = xr.open_mfdataset(filepath);
        elif len(filepath) > 1: 
            ds = xr.open_mfdataset(filepath,combine="by_coords");

        # Select variable from dataset 
        if var == 'ta':
            ds = ds.ta.sel(plev=70000) #select 700hPa 
        elif var == 'ts':
            ds = ds.ts
        elif var == 'hur':
            ds = ds.hur.sel(plev=70000)
        elif var == 'sfcWind':
            if modelname == 'ccsm4': #calculate ccsm4 WS with ua/va 
                pass
            else:
                ds = ds.sfcWind
        elif var == 'ua':
            if modelname == 'ccsm4' or modelname == 'e3sm':
                ds = ds.ua.sel(plev=1e5)
            else: 
                ds = ds.uas
        elif var == 'va':
            if modelname == 'ccsm4' or modelname =='e3sm':
                ds = ds.va.sel(plev=1e5)
            else: 
                ds = ds.vas
        elif var == 'wap': 
            ds = ds.wap.sel(plev=70000)
        

        # Change lon coords
        ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon')
        ds.lat.attrs['units'] = 'degrees' 
        ds.lon.attrs['units'] = 'degrees' 

        # Change time coords to Month Start 
        resampled = ds.resample(time="MS").mean()
        if type(ds.time[0].values) != np.datetime64:
            ds['time'] = resampled.indexes["time"].to_datetimeindex()
        elif type(ds.time[0].values) == np.datetime64:
            ds['time'] = resampled.indexes["time"]

        # Change time coords to time range within 1982-2014 
        if ds['time.year'][-1].values == 2014:
            ds = ds.sel(time=slice("1982-01-01",ds.time.isel(time=-1))).rename(modelname)

        elif ds['time.year'][-1].values < 2014:
                nans =xr.DataArray(np.empty((len(ds.lat.values),len(ds.lon.values),12*(2014-1982+1))), 
                                   [('lat',ds.lat.values), ('lon', ds.lon.values),
                                    ('time',pd.date_range("1982-01-01", "2014-12-01",freq='MS'))])
                nans[:] = np.NaN
                ds = ds.sel(time=slice("1982-01-01",ds.time.isel(time=-1)))
                ds = ds.combine_first(nans).rename(modelname)

        elif ds['time.year'][-1].values > 2014:
            ds = ds.sel(time=slice("1982-01-01","2014-12-01")).rename(modelname)
       
        #interp grids (selected CCSM4 for coordinates)
        path = kernel_path+'*CCSM4*.nc'
        kernel = xr.open_mfdataset(path)
        kernel_ds = kernel.dRdxi.isel(i=0).expand_dims({'time':ds.time}, axis=2)\
                    .assign_coords({'latitude':kernel.lat,'longitude':kernel.lon})\
                    .rename({'latitude':'lat','longitude':'lon'})
            
        # Interp variables and assign to dict 
        if var == 'ta':
            # interp ts
            ds = ds.interp_like(kernel_ds)
            dict_ds_t700[modelname] = ds
        elif var == 'ts':
            ds = ds.interp_like(kernel_ds)
            dict_ds_ts[modelname] = ds
        elif var == 'hur':
            ds = ds.interp_like(kernel_ds)
            dict_ds_hur700[modelname] = ds
        elif var == 'sfcWind':
            ds = ds.interp_like(kernel_ds)
            dict_ds_sfcWind[modelname] = ds
        elif var == 'ua':
            ds = ds.interp_like(kernel_ds)
            dict_ds_uas[modelname] = ds
        elif var == 'va':
            ds = ds.interp_like(kernel_ds)
            dict_ds_vas[modelname] = ds
        elif var == 'wap':
            ds = ds.interp_like(kernel_ds)
            dict_ds_wap[modelname] = ds
    
    ds = mpcalc.advection(dict_ds_ts[modelname],u=dict_ds_uas[modelname], v=dict_ds_vas[modelname]) 
    dict_ds_tadv[modelname] = ds
    
        
# Change dictionaries to Xarray
ts_2014 = xr.Dataset(dict_ds_ts)
hur_2014 = xr.Dataset(dict_ds_hur700)
ws_2014 = xr.Dataset(dict_ds_sfcWind)
wap_2014 = xr.Dataset(dict_ds_wap)
tadv_2014 = xr.Dataset(dict_ds_tadv)
t700 = xr.Dataset(dict_ds_t700)
uas = xr.Dataset(dict_ds_uas)
vas = xr.Dataset(dict_ds_vas)


  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds

  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds

  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds

  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds

  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()
  sample = dates.ravel()[0]
  ds['time'] = resampled.indexes["time"].to_datetimeindex()


In [28]:
# Calcualte EIS 
eis_2014 = climlab.EIS(ts_2014,t700)

# Calculate ws from ua/va for CCSM4 
#Metpy has Pint quantity units; chunk removes that 
ws_2014['ccsm4'] = mpcalc.wind_speed(uas.ccsm4,vas.ccsm4).rename('ccsm4').chunk()
ws_2014=ws_2014.drop(['plev','height'])

# Change tadv to dask 
tadv_2014 = tadv_2014.chunk()

In [None]:
ts_2.attrs['units'] = 'degrees' 

In [29]:
# Slice time period and save as nc files 
path='/data/keeling/a/rytam2/ccf_model_spread/data/preprocessed/'

ts_2014.to_netcdf(path+'ts_amip_CMIP5&6_198201_201412.nc')
eis_2014.to_netcdf(path+'eis_amip_CMIP5&6_198201_201412.nc')
hur_2014.drop('plev').to_netcdf(path+'hur_amip_CMIP5&6_198201_201412.nc')
ws_2014.to_netcdf(path+'ws_amip_CMIP5&6_198201_201412.nc')
wap_2014.drop('plev').to_netcdf(path+'wap_amip_CMIP5&6_198201_201412.nc')
tadv_2014.to_netcdf(path+'tadv_amip_CMIP5&6_198201_201412.nc')


ts_2008 = ts_2014.sel(time=slice('1982-01-01','2008-12-01'))
ts_2008.to_netcdf(path+'ts_amip_CMIP5&6_198201_200812.nc')

eis_2008 = eis_2014.sel(time=slice('1982-01-01','2008-12-01'))
eis_2008.to_netcdf(path+'eis_amip_CMIP5&6_198201_200812.nc')

hur_2008 = hur_2014.sel(time=slice('1982-01-01','2008-12-01'))
hur_2008.to_netcdf(path+'hur_amip_CMIP5&6_198201_200812.nc')

ws_2008 = ws_2014.sel(time=slice('1982-01-01','2008-12-01'))
ws_2008.to_netcdf(path+'ws_amip_CMIP5&6_198201_200812.nc')

wap_2008 = wap_2014.sel(time=slice('1982-01-01','2008-12-01'))
wap_2008.to_netcdf(path+'wap_amip_CMIP5&6_198201_200812.nc')

tadv_2008 = tadv_2014.sel(time=slice('1982-01-01','2008-12-01'))
tadv_2008.to_netcdf(path+'tadv_amip_CMIP5&6_198201_200812.nc')

## Comments

### Improvements: 
1. Try using dask to stack? 