In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import cartopy.crs as ccrs
import glob
import os
import scipy.stats
from matplotlib import cm
import seaborn as sns
import dask
import pickle
from datetime import datetime
import ast

In [2]:
dask.config.set(**{'array.slicing.split_large_chunks': False})

<dask.config.set at 0x7f19805353d0>

In [3]:
import warnings
warnings.simplefilter("ignore")
#annoying cftime serialization warning 

In [4]:
from dask.distributed import Client, LocalCluster
if __name__ == "__main__":
    cluster=LocalCluster(host="tcp://127.0.0.1:2458",dashboard_address="127.0.0.1:2469",n_workers=4)
    client = Client(cluster)

In [5]:
models = [x.split('/')[-1] for x in glob.glob("/terra/data/cmip5/global/rcp85/*")]   

In [6]:
dic = {}
for model in models:
    try:
        rcp85_files = sorted(glob.glob("/terra/data/cmip5/global/rcp85/"+str(model)+"/r1i1p1/mon/native/pr_*"))
        rcp85 = xr.open_mfdataset(rcp85_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
        rcp85 = rcp85.sel(time = slice('2000','2250'))
        hist_files = sorted(glob.glob("/terra/data/cmip5/global/historical/"+str(model)+"/r1i1p1/mon/native/pr_*"))
        hist = xr.open_mfdataset(hist_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
        x = xr.concat([hist,rcp85],dim='time').load()
        x = x.sortby(x.time)
        x = x.resample(time='M').mean()
        dic[model] = x - hist.sel(time=slice('1979','2005')).mean(dim='time')
    except:
        if model == 'BNU-ESM':  # no historical monthly data 
            rcp85_files = sorted(glob.glob("/terra/data/cmip5/global/rcp85/"+str(model)+"/r1i1p1/mon/native/pr_*"))
            rcp85 = xr.open_mfdataset(rcp85_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
            rcp85 = rcp85.sel(time = slice('2000','2250'))
            hist_files = sorted(glob.glob("/terra/data/cmip5/global/historical/"+str(model)+"/r1i1p1/day/native/pr_*"))
            hist = xr.open_mfdataset(hist_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
            hist = hist.resample(time='M').mean()
            x = xr.concat([hist,rcp85],dim='time').load()
            x = x.sortby(x.time)
            x = x.resample(time='M').mean()
            dic[model] = x - hist.sel(time=slice('1979','2005')).mean(dim='time')
        elif model == 'MPI-ESM-LR':  # a problem with the later than 2100 data
            rcp85_files = sorted(glob.glob("/terra/data/cmip5/global/rcp85/"+str(model)+"/r1i1p1/mon/native/pr_*"))[0]
            rcp85 = xr.open_mfdataset(rcp85_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
            rcp85 = rcp85.sel(time = slice('2000','2250'))
            hist_files = sorted(glob.glob("/terra/data/cmip5/global/historical/"+str(model)+"/r1i1p1/mon/native/pr_*"))
            hist = xr.open_mfdataset(hist_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
            x = xr.concat([hist,rcp85],dim='time').load()
            x = x.sortby(x.time)
            x = x.resample(time='M').mean()
            dic[model] = x - (x.sel(time=slice('1979','2005')).mean(dim='time'))
        elif model == 'CNRM-CM5': # a problem with the later than 2100 data
            rcp85_files = sorted(glob.glob("/terra/data/cmip5/global/rcp85/"+str(model)+"/r1i1p1/mon/native/pr_*"))[:2]
            rcp85 = xr.open_mfdataset(rcp85_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
            rcp85 = rcp85.sel(time = slice('2000','2250'))
            hist_files = sorted(glob.glob("/terra/data/cmip5/global/historical/"+str(model)+"/r1i1p1/mon/native/pr_*"))
            hist = xr.open_mfdataset(hist_files, decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').pr
            x = xr.concat([hist,rcp85],dim='time').load()
            x = x.sortby(x.time)
            x = x.resample(time='M').mean()
            dic[model] = x - (x.sel(time=slice('1979','2005')).mean(dim='time'))
        else:
            print(model)

EC-EARTH
CESM1-WACCM


In [7]:
#NOAA
x = xr.open_mfdataset('/home/pmarsh/NOAA_2deg/prate.mon.mean.nc', decode_cf=True).sel(lat = -34, method = 'nearest').sel(lon = 18, method = 'nearest').prate
x = x.sortby(x.time)
x = x.resample(time='M').mean()
x = x.sel(time=slice('1940','2016'))
dic['NOAA'] = x - (x.sel(time=slice('1979','2005')).mean(dim='time'))

In [8]:
#ERA5
x = xr.open_mfdataset(sorted(glob.glob('/terra/data/reanalysis/global/reanalysis/ECMWF/ERA5/day/native/pr_*')), decode_cf=True).sel(latitude = -34, method = 'nearest').sel(longitude = 18, method = 'nearest').pr
x = x.sortby(x.time)
x = x.resample(time='M').mean()
dic['ERA5'] = x - (x.sel(time=slice('1979','2005')).mean(dim='time'))

In [9]:
pickle.dump(dic, open( "monthly_pr_dic.p", "wb" ) )

In [11]:
client.close()