In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import math
import glob
import pickle
import statistics
import scipy.stats as stats
from sklearn.neighbors import KernelDensity
import dask
import seaborn as sns
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

In [2]:
def get_files():
    models = glob.glob("/terra/data/cmip5/global/historical/*")
    avail={}
    for model in models:
        zg = glob.glob(str(model)+"/r1i1p1/day/2deg/zg*")
        try:
            test = zg[0]
            avail[model.split('/')[-1]] = zg
        except:
             pass
    return avail

In [13]:
files = get_files()
files['NOAA'] = glob.glob("/home/pmarsh/NOAA_2deg/NOAA_zg/*.nc") #need 2degs!
files['ERA5'] = glob.glob("/home/pmarsh/NOAA_2deg/ERA5_zg/*.nc")
files.pop('MIROC-ESM')

['/terra/data/cmip5/global/historical/MIROC-ESM/r1i1p1/day/2deg/zg_day_MIROC-ESM_historical_r1i1p1_19500101-20051231.nc']

In [14]:
def contourise(x):
    x = x.fillna(0)
    x = x.where((x>=limit))
    x = x/x
    return x

In [15]:
results={}
for model in ['NOAA','GFDL-CM3']:
    print(model)
    x = xr.open_mfdataset(files[model])
    if model == 'NOAA':
        x = x.rename({'hgt':'zg'})
        x = x.rename({'level':'plev'})
        x = x.sel(plev=850)
        x = x.sel(time=slice('1950','2005'))
    elif model == 'ERA5':
        x = x.rename({'level':'plev'})
        x = x.sel(plev=850)
        x = x.sel(time=slice('1979','2005'))
    else:
        x = x.sel(plev=85000)
        x = x.sel(time=slice('1950','2005'))
        x = x.load()
    x = x.sel(lat=slice(-60,0))
    x = x[['zg']]
    x = x.assign_coords(lon=(((x.lon + 180) % 360) - 180))
    with dask.config.set(**{'array.slicing.split_large_chunks': True}):
        x = x.sortby(x.lon)
    x = x.sel(lon=slice(-50,20))
    x = x.resample(time="QS-DEC").mean(dim="time",skipna=True)
    x = x.load()
    limit = np.nanquantile(x.zg.values,0.9)
    results[model]={}
    for seas in ['DJF','MAM','JJA','SON']:
        mean_seas = x.where(x.time.dt.season==str(seas)).dropna(dim='time')
        mean_seas = contourise(mean_seas).zg.fillna(0).mean(dim='time')
        results[model][seas] = mean_seas.fillna(0)
    x.close()

NOAA
GFDL-CM3


In [17]:
scores=[]
for index in results:
    reference = index
    for index in results:
        MAE=[]
        for season in ['DJF','MAM','JJA','SON']:
            ref = results[reference][season]
            x = results[index][season]
            for i in ref.lat.values:
                for j in ref.lon.values:
                    MAE.append(float(np.abs(ref.sel(lat=i).sel(lon=j) - x.sel(lat=i).sel(lon=j)).values))
        scores.append([reference,index,np.mean(MAE)])

KeyError: -24.0

In [None]:
models = list(results.keys())

In [None]:
df = pd.DataFrame(models,columns = ['models'])
start = 0
end = len(models)
for index in models:
    df[index] = np.array(scores[start:end])[:,-1].astype(float)
    start = start + len(models)
    end = end + len(models)

In [None]:
df = df.set_index('models')

In [None]:
df.to_csv('High_2D_Linkage.csv')

In [None]:
df = df.dropna(0,how='all').dropna(1,how='all')

In [16]:
xr.open_mfdataset(files['NOAA'])

Unnamed: 0,Array,Chunk
Bytes,354.92 MiB,354.92 MiB
Shape,"(42368, 2, 18, 61)","(42368, 2, 18, 61)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 354.92 MiB 354.92 MiB Shape (42368, 2, 18, 61) (42368, 2, 18, 61) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",42368  1  61  18  2,

Unnamed: 0,Array,Chunk
Bytes,354.92 MiB,354.92 MiB
Shape,"(42368, 2, 18, 61)","(42368, 2, 18, 61)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
