In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import xesmf as xe

from mlprecip_utils import *
from mlprecip_datasets import *
import mlprecip_datasets

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Define Winter and Summer Months
winter=[12,1,2]
summer=[6,7,8]

# Dates
sdate='1979-01-01'
edate='2019-08-31'

# Define SEUS Region
lat_reg=[24.0,36.0]
lon_reg=[269.0,283.0]

### Get Precip Target

#files='/shared/ccsm4/khuang/obs/era-interim/era-interim.daily.precip.*.nc'
#dataset='era-interim-precip'
#ds=xr.open_mfdataset(files,combine='by_coords')

files='/shared/ccsm4/kpegion/mlso/era5_precip_daily.1979-01-01.2020-12-31.nc'
dataset='era5-precip'
ds=xr.open_mfdataset(files)

ds_seus=ds.rename({'latitude':'lat',
                   'longitude':'lon'}).sel(lat=slice(lat_reg[0]-5,lat_reg[1]+5),
                                           lon=slice(lon_reg[0]-5,lon_reg[1]+5)).compute()
ds_seus

ds.close()

#ds_seus=ds_seus.chunk({'lat':10,'lon':10,'time':1000})
ds_climo=daily_climo(ds_seus['precip'],'precip')['precip'].chunk({'dayofyear':366,'lat':78,'lon':86}) 

ds_climo.compute()

ds_climo

ds_anoms=(ds_seus['precip'].groupby('time.dayofyear')-ds_climo).compute()

del ds_climo, ds

ds.close()

In [3]:
#del ds_climo, ds

ds_anoms

ds_anoms=ds_anoms.chunk({'lon':86,'lat':78})
ds_anoms

new_lon=np.arange(0,360,5)
new_lat=np.arange(-90,92,5)

ds_out = xr.Dataset({'lat': (['lat'], new_lat),
                     'lon': (['lon'], new_lon)}).sel(lon=slice(ds_anoms['lon'][0],ds_anoms['lon'][-1]),
                                                     lat=slice(ds_anoms['lat'][0],ds_anoms['lat'][-1]))
print(ds_out)
#regridder = xe.Regridder(ds_anoms,ds_out,'bilinear',periodic=True)
#ds_anoms=regridder(ds_anoms)

ds_out

regridder = xe.Regridder(ds_anoms,ds_out,'bilinear',periodic=True)
ds_anoms=regridder(ds_anoms)

ds_anoms

In [4]:
indices_dict=init_predictors_dict()

seas_list=[]

# Loop over seasons
for seas,slabel,seas_abbrv in zip([winter,summer],['Winter','Summer'],['DJF','JJA']):
    
    print(slabel)
        
    features_list=[]
    
    # Loop over all indices
    for i,f in enumerate(indices_dict):
        
        # Check if this predictor is valid for this season
        if (seas_abbrv in f['seas']):
            
            # Read the Indices
            if (f['ptype']=='index' and f['freq']=='mon'):
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],f['name'],sdate,edate)
                # Linearly interpolate monthly indices to daily
                ds=ds.resample(time='1D').interpolate("linear").sel(time=slice(sdate,edate))
            elif (f['name']=='RMM_amp'):
                _,ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='RMM_phase'):
                ds,_=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='pnaregimes'): 
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],seas_abbrv,sdate,edate)
            elif (f['name']=='nash_amp'):
                _,ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='nash_phase'):
                ds,_=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='z500_local'):
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],lat_reg,lon_reg,sdate,edate)
            else:
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
        
            # Detrend and standardize indices that are not categorical
            if (f['ptype']=='index'):
                v=f['name']
                ds[v]=(ds[v]-ds[v].mean(dim='time'))/ds[v].std(dim='time')

            # Select season from predictor index and target index
            #ds_i=ds.sel(time=ds['time.month'].isin(seas))        
        
        # Append this predictor index to list of all predictors
        features_list.append(ds)
            
    ds_f=xr.merge(features_list)
    seas_list.append(ds_f)

ds_features=xr.combine_by_coords(seas_list)
ds_features=ds_features.sortby('time')

Winter
Summer


ds_features

In [5]:
# Features
ds_features_tval=ds_features.sel(time=slice('1979-01-01','2016-12-31'))
ds_features_test=ds_features.sel(time=slice('2017-01-01','2019-09-01'))

# Target
#weights=np.cos(np.deg2rad(ds_anoms['lat']))
#ds_anoms=ds_anoms*weights
#ds_anoms_tval=ds_anoms.sel(time=slice('1979-01-01','2016-12-31'))
#ds_target_tval=ds_anoms_tval.sel(lat=slice(lat_reg[0],lat_reg[1]),
#                       lon=slice(lon_reg[0],lon_reg[1])).mean(dim=['lat','lon'],skipna=True)
#ds_target_tval=ds_anoms_tval.sel(lat=slice(lat_reg[0],lat_reg[1]),
#                       lon=slice(lon_reg[0],lon_reg[1])).mean(dim=['lat','lon'])
#ds_target_tval=ds_anoms_tval.sel(lat=slice(lat_reg[0],lat_reg[1]),
#                       lon=slice(lon_reg[0],lon_reg[1]))
#ds_target_tval=ds_target_tval.chunk({'lat':5,'lon':5,'time':1}).mean(dim=['lat','lon'])

#ds_anoms_test=ds_anoms.sel(time=slice('2017-01-01','2019-09-01'))
#ds_target_test=ds_anoms_test.sel(lat=slice(lat_reg[0],lat_reg[1]),
#                       lon=slice(lon_reg[0],lon_reg[1])).mean(dim=['lat','lon'],skipna=True)


del ds_anoms, ds_anoms_tval, ds_anoms_test

ds_target_tval

### Write to fnn and lr data directory

In [6]:
#ds_features_tval.to_netcdf('../data/fnn/features_trainval.nc')
#ds_features_test.to_netcdf('../data/fnn/features_test.nc')
#ds_target_tval.to_dataset(name='precip').to_netcdf('../data/fnn/target_'+dataset+'_trainval.nc')
#ds_target_test.to_dataset(name='precip').to_netcdf('../data/fnn/target_'+dataset+'_test.nc')