In [18]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import cartopy.crs as ccrs
import glob
import os
import scipy.stats
from matplotlib import cm
import pickle

In [2]:
def get_pressure_weighted(x):
    dPref = (x.plev.values[0]-x.plev.values[-1])  #(p-ps)
    integral = []
    for i in range(len(x.plev)): #Integral of variable from P to Ps calculated as area between each pressure variable trapezoid then summed
        if i+1 < len(x.plev):
            area=((x.loc[dict(plev=x.plev.values[i])] + x.loc[dict(plev=x.plev.values[i+1])])/2)*(x.plev.values[i]-x.plev.values[i+1])
            integral.append(area)
    pw = (sum(integral))/dPref
    return(pw)

In [3]:
def low_pass_weights(window, cutoff):
    order = ((window - 1) // 2 ) + 1
    nwts = 2 * order + 1
    w = np.zeros([nwts])
    n = nwts // 2
    w[n] = 2 * cutoff
    k = np.arange(1., n)
    sigma = np.sin(np.pi * k / n) * n / (np.pi * k)
    firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k)
    w[n-1:0:-1] = firstfactor * sigma
    w[n+1:-1] = firstfactor * sigma
    return w[1:-1]

wgts = low_pass_weights(41, 1/10)
weight = xr.DataArray(list(wgts), dims=['window'])

In [4]:
def jettracker(x):
    x = x.fillna(0)
    x = x.rolling(time=41, center=True).construct('window').dot(weight)
    x = x.dropna(dim='time',how='all')
    limit = np.quantile(x.values,0.9)
    x = x.where((x>=limit))
    x = x/x
    x = x.fillna(0)
    x = x.resample(time='QS-DEC').mean(dim='time',skipna=True)
    return x

In [5]:
def rainref(pr,x):
    pr = pr.sel(lat=-34,method='nearest')
    pr = pr.sel(lon=18,method='nearest')
    pr = pr.resample(time='QS-DEC').mean(dim='time',skipna=True)
    pr = pr.sel(time=list(x.time.values)).pr.load()
    return pr

In [6]:
def correlation(x,pr):
    x = x.dropna(dim='time')
    pr = pr.dropna(dim='time')
    cor = x.copy()
    cor = cor[0]*0
    cor = cor.drop_vars('time')
    pv = cor.copy()
    for i in range(len(x.lat)):
        for j in range(len(x.lon)):
            cor[i][j] = scipy.stats.pearsonr(list(x.sel(lat=x.lat[i]).sel(lon=x.lon[j]).values),list(pr.values))[0]
            pv[i][j] = scipy.stats.pearsonr(list(x.sel(lat=x.lat[i]).sel(lon=x.lon[j]).values),list(pr.values))[1]
    return cor,pv

In [7]:
def get_files():
    models = glob.glob("/terra/data/cmip5/global/historical/*")
    avail={}
    for model in models:
        ua = glob.glob(str(model)+"/r1i1p1/day/2deg/ua_*")
        va = glob.glob(str(model)+"/r1i1p1/day/2deg/va_*")
        try:
            test = ua[0]
            avail[model.split('/')[-1]] = [ua,va]
        except:
             pass
    return avail

In [8]:
files = get_files()

In [9]:
files.pop('EC-EARTH')

[['/terra/data/cmip5/global/historical/EC-EARTH/r1i1p1/day/2deg/ua_day_EC-EARTH_historical_r1i1p1_19500101-20051231.nc'],
 []]

In [10]:
files['NOAA'] = ['/home/pmarsh/NOAA_2deg/NOAA/NOAA_ua_850_700_2deg.nc','/home/pmarsh/NOAA_2deg/NOAA/NOAA_va_850_700_2deg.nc']

In [11]:
files['ERA5'] = [glob.glob("/home/pmarsh/NOAA_2deg/ERA5/ERA5_ua_850_700_2deg.nc"),glob.glob("/home/pmarsh/NOAA_2deg/ERA5/ERA5_va_850_700_2deg.nc")]

In [20]:
jetdic={}
prdic={}
cordic={}
pvdic={}
for model in files:
    print(model)
    U = xr.open_mfdataset(files[model][0])
    V = xr.open_mfdataset(files[model][1])
    levels=[85000,70000]
    if model == 'NOAA':
        U = U.rename({'uwnd':'ua'})
        U = U.rename({'level':'plev'})
        V = V.rename({'vwnd':'va'})
        V = V.rename({'level':'plev'})
        levels=[850,700]
    elif model == 'ERA5':
        U = U.rename({'level':'plev'})
        V = V.rename({'level':'plev'})
        levels=[850,700]
    x = np.sqrt(np.square(U.ua) + np.square(V.va))
    x['ws'] = x
    x = x.sel(plev=levels)
    x = x.sel(lat = slice(-75,-15))
    x = x.sel(lon = slice(-40,30))
    x = get_pressure_weighted(x)
    if model == 'ERA5':
        x = x.sel(time=slice('1980', '2020'))
    else:
        x = x.sel(time=slice('1950', '2005'))
    jetdic[model] = jettracker(x).load()
    if model == 'NOAA':
        pr =xr.open_mfdataset(glob.glob("/terra/data/reanalysis/global/reanalysis/NOAA/20thC/r1/day/native/pr*"))
        pr = pr.sel(time=slice('1950', '2005'))
        pr = pr.rename({'prate':'pr'})
    elif model == 'ERA5':
        pr = xr.open_mfdataset(glob.glob("/terra/data/reanalysis/global/reanalysis/ECMWF/ERA5/day/native/pr*"))
        pr = pr.rename({'latitude':'lat'})
        pr = pr.rename({'longitude':'lon'})
        pr = pr.sel(time=slice('1980', '2020'))
    else:
        pr =xr.open_mfdataset(glob.glob("/terra/data/cmip5/global/historical/"+str(model)+"/r1i1p1/day/native/pr*"))
        pr = pr.sel(time=slice('1950', '2005'))
    prdic[model] = rainref(pr,jetdic[model])
    cordic[model],pvdic[model] = correlation(jetdic[model],prdic[model])
    U.close()
    V.close()
    x.close()

IPSL-CM5A-LR




MPI-ESM-MR




MRI-ESM1




MIROC-ESM-CHEM




HadGEM2-CC




bcc-csm1-1-m




FGOALS-g2




ACCESS1-0




MIROC-ESM




CSIRO-Mk3-6-0




GFDL-CM3




CanESM2




IPSL-CM5B-LR




MPI-ESM-LR




CMCC-CM




GFDL-ESM2G




MIROC5




MPI-ESM-P




CMCC-CMS




CNRM-CM5




IPSL-CM5A-MR




inmcm4




ACCESS1-3




MRI-CGCM3




bcc-csm1-1




CMCC-CESM




HadGEM2-AO




BNU-ESM




NorESM1-M




HadCM3




GFDL-ESM2M




MIROC4h




NOAA


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


ERA5




In [21]:
pickle.dump(jetdic, open( "../JET_OUT/jettrack_2D.p", "wb" ))
pickle.dump(pvdic, open( "../JET_OUT/jettrack_2D_pv.p", "wb" ))
pickle.dump(cordic, open( "../JET_OUT/jettrack_2D_cor.p", "wb" ))

In [45]:
weights = np.cos(np.deg2rad(jetdic['NOAA'].lat)) #area weighted

In [46]:
#mean absolute error calc
results=[]
for index in jetdic:
    MAE={}
    for season in ['DJF','MAM','JJA','SON']:
        ref = jetdic['NOAA'].where(jetdic['NOAA'].time.dt.season==season).mean(dim='time')
        x = jetdic[index].where(jetdic[index].time.dt.season==season).mean(dim='time')
        MAE[season] = (np.abs(ref - x)).weighted(weights).sum(('lat','lon'))
    results.append([index,np.mean(MAE['DJF'].values + MAE['MAM'].values + MAE['JJA'].values + MAE['SON'].values)])

In [47]:
results = pd.DataFrame(results,columns=['Model','score'])
results = results.sort_values('score')

In [49]:
results.to_csv('../JET_OUT/scores_2D.csv')