In [1]:
import sys
import dask
import warnings
warnings.filterwarnings('ignore')
sys.path.append('/ocean/projects/atm200007p/sferrett/Repos/monsoon-pr/scripts/')
from preprocessing_utils import *

In [2]:
AUTHOR   = 'Savannah L. Ferretti'
EMAIL    = 'savannah.ferretti@uci.edu'
SAVEDIR  = '/ocean/projects/atm200007p/sferrett/Repos/monsoon-pr/data/raw'
YEARS    = [2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014]
MONTHS   = [6,7,8]
LATRANGE = (5.,25.) 
LONRANGE = (60.,90.)
LEVRANGE = (500.,1000.)
FREQUENCY = 'H'

In [3]:
era5  = get_era5()
imerg = get_imerg()

In [5]:
qdata  = era5.specific_humidity
tdata  = era5.temperature
psdata = era5.surface_pressure/100 # Convert from Pa to hPa
prdata = imerg.precipitationCal*24 # Convert from mm/hr to mm/day

In [6]:
variables = [
    (qdata,'q','Specific humidity','kg/kg','ERA5'),
    (tdata,'t','Air temperature','K','ERA5'),
    (psdata,'ps','Surface pressure','hPa','ERA5'),
    (prdata,'pr','Precipitation flux','mm/day','IMERG V06')]

In [7]:
@dask.delayed
def preprocess(data,shortname,longname,units,source,
               years=YEARS,months=MONTHS,frequency=FREQUENCY,
               latrange=LATRANGE,lonrange=LONRANGE,levrange=LEVRANGE,
               author=AUTHOR,email=EMAIL,savedir=SAVEDIR):
    data = standardize_dims(data)
    data = subset_dims(data,years,months,latrange,lonrange,levrange)
    if xr.infer_freq(data.time) != frequency:
        data = resample_time(data,frequency)
    newdata  = create_dataset(data,shortname,longname,units,source,author,email)
    save_dataset(newdata,savedir)

In [9]:
tasks = [preprocess(data,shortname,longname,units,source) for data,shortname,longname,units,source in variables]
dask.compute(*tasks)


KeyboardInterrupt

