In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
#import seaborn as sns
from scipy import signal

import cartopy.feature as cfeature
import cartopy.crs as ccrs
import cartopy.mpl.ticker as cticker
from cartopy.util import add_cyclic_point
 
from mlprecip_utils import *
from mlprecip_models import *
from mlprecip_datasets import *
from mlprecip_xai import *
from mlprecip_plot import *

import mlprecip_datasets

import warnings

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
warnings.filterwarnings("ignore")

### Set Variables for plotting, region, etc.

In [3]:
cmap='DryWet'
clevs=np.arange(-0.8,0.9,0.1)
labels=['Positive','Neutral','Negative']
varname='precip'
#varname='z500'
cat_labels=['Lower','Upper']
model_types=['logmodel_med','nnmodel_med']

# SEUS
lat_reg=[24.0,36.0]
lon_reg=[269.0,283.0]

# US
#lonreg=[230,310]
#latreg=[25,55]

winter=[12,1,2]
summer=[6,7,8]
sdate='1979-01-01'
#edate='2019-12-31'
edate='2018-12-31'
bins=[-100,-0.5,0.5,100]

### Read in Precip Data, Subset to Region, & Calculate Anomalies

In [4]:
fnames='/shared/ccsm4/kpegion/obs2/CPC-UNI/precip.V1.0.*.nc'
ds=xr.open_mfdataset(fnames,combine='by_coords').sel(time=slice(sdate,edate))

# Subset to SEUS
ds_seus=ds.sel(lat=slice(lat_reg[0],lat_reg[1]),lon=slice(lon_reg[0],lon_reg[1]))

# Calculate Anomalies
ds_anoms=ds_seus.groupby('time.dayofyear')-ds_seus.groupby('time.dayofyear').mean()

# Smooth data
#ds_anoms=ds_anoms.rolling(time=7,center=False).mean() #.dropna(dim='time')

ds_anoms=ds_anoms.sortby('time')

### ERAI Z500

fname='/project/predictability/kpegion/wxregimes/era-interim/erai_z500_1979-2019.nc'
ds=xr.open_dataset(fname)
ds_anoms=ds.groupby('time.dayofyear')-ds.groupby('time.dayofyear').mean()

### Main Program

In [None]:
indices_dict=init_predictors_dict()

# Loop over seasons
#for seas,slabel,seas_abbrv in zip([winter,summer],['Winter','Summer'],['DJF','JJA']):
for seas,slabel,seas_abbrv in zip([summer],['Summer'],['JJA']):
    
    print(slabel)
        
    features_list=[]
    
    # Loop over all indices
    for i,f in enumerate(indices_dict):
        
        # Check if this predictor is valid for this season
        if (seas_abbrv in f['seas']):
        
            # Read the Indices
            if (f['ptype']=='index' and f['freq']=='mon'):
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],f['name'],sdate,edate)
                # Linearly interpolate monthly indices to daily
                ds=ds.resample(time='1D').interpolate("linear").sel(time=slice(sdate,edate))
            elif (f['name']=='RMM_amp'):
                _,ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='RMM_phase'):
                ds,_=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='pnaregimes'): 
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],seas_abbrv,sdate,edate)
            elif (f['name']=='nash_amp'):
                _,ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='nash_phase'):
                ds,_=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
            elif (f['name']=='z500_local'):
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],lat_reg,lon_reg,sdate,edate)
            else:
                ds=getattr(mlprecip_datasets,f['readfunc'])(f['file'],sdate,edate)
        
            # Detrend and standardize indices that are not categorical
            if (f['ptype']=='index'):
                v=f['name']
                ds[v]=(ds[v]-ds[v].mean(dim='time'))/ds[v].std(dim='time')
        
            # Select season from predictor index and target index
            ds_i=ds.sel(time=ds['time.month'].isin(seas)) #.dropna(dim='time')        
            ds_p=ds_anoms.sel(time=ds_anoms['time.month'].isin(seas)).sel(time=slice(sdate,edate)) #.dropna(dim='time')
        
            # Append this predictor index to list of all predictors
            features_list.append(ds_i[f['name']].sortby('time'))

        # Categorize predictor indices for composites and ratios
        #ds_bins=[]
        #if (f['ptype']=='index'):
        #    ds_bins=makeCategories(ds_i,bins,f['name'])
        #    labels=['Positive','Neutral','Negative']
        #else:
        #    ds_bins=xr.DataArray(ds[f['name']],
        #                         coords={'time':ds['time'].values},
        #                         dims=['time'])  
        #    ds_bins=ds_bins.to_dataset(name=f['name']+'_bins')
        #    nbins=int(np.nanmax(ds[f['name']].values)+1)
        #    labels=np.arange(nbins).astype(str)  
        #ds=xr.merge([ds_p,ds_bins])
        
        #---------- COMPOSITES ------------#
        #ds_comp,totals=calcComposites(ds,f['name'],labels)
        #ds_totals=xr.concat(totals,dim=f['name']+'_bins').to_dataset(name='bins_count')
        #suptitle=slabel+' '+' Anomalies '+f['name'].upper()
        #compfile='../figs/comp_anomaly.daily.'+slabel+'.'+f['name']+'.png'
        #plotComposites(ds_comp,f['name'],totals,suptitle,labels,clevs,cmap,compfile)
        
        #---------- % DAYS ABOVE/BELOW NORMAL -------#   
        #da_above,da_below,da_ratio=calcRatios(ds,f['name'],'precip',0.0)
        #clevs_ratio=np.arange(0,100,5)
        
        # Above
        #suptitle=f['name'].upper()+' % Days Above Normal '+slabel
        #compfile='../figs/comp_above.daily.'+slabel+'.'+f['name']+'.png'
        #plotRatios((da_above/(da_above+da_below)*100),f['name'],suptitle,labels,clevs_ratio,'Greens1',compfile)
        
        # Below
        #suptitle=f['name'].upper()+' % Days Below Normal '+slabel
        #compfile='../figs/comp_below.daily.'+slabel+'.'+f['name']+'.png'
        #plotRatios((da_below/(da_above+da_below)*100),f['name'],suptitle,labels,clevs_ratio,'Browns3',compfile)
        
    #----- FIT MODELS AND TEST  -----------#
    
    # Define Features
    ds_features=xr.merge(features_list).dropna(dim='time')   
    
    # Define Index Target as Mean precip in region
    # Note this is currently designed as a diagnostic not a real prediction; add target lead for prediction
    ds_target=ds_p.mean(dim=['lat','lon'])
    
    # Make sure we have the same times for target and features
    ds_target,ds_features=xr.align(ds_target.dropna(dim='time'),ds_features.dropna(dim='time'),join='inner')
    
    median=np.percentile(ds_target[varname],50)
    ds_target[varname]=ds_target[varname]-median
        
    # Test the prediction of daily preciptation anomaly
    #print('============= Regression ==========================')
    #testModelsRegr(ds_features,ds_target)
    
    # Test the prediction of precipitation in categories by different ML models
    print('============= Categorical Models  ================')
    nmodels=100
    
    ds_model_list=[]
    for m_function in model_types:
        print(m_function)
        fname='../data/fnn/seus.median.'+seas_abbrv+'.'+m_function
        tmp=testModelsCat(m_function,ds_features,ds_target,varname,nmodels,fname=fname)
        ds_model_list.append(tmp)
        
    # Create Dataset of different kinds of ML models (logistic, NNs, etc)
    ds_model=xr.concat(ds_model_list,dim='model_type')
    ds_model['model_type']=model_types
        
    # Save model output
    ds_model.to_netcdf('../data/fnn/model_output.'+seas_abbrv+'.nc')    

Summer
logmodel_med
Upper Cat:  1502
Lower Cat:  1502
Check Features and Target Dimensions
Features (X):  (3004, 10)
Target (Y):  (3004, 2)
Samples:  3004
Features:  10
Training Size:  2403
Testing Size:  601
Epoch 00160: early stopping
Training set accuracy score: 0.5796920524083026
Test set accuracy score: 0.5424292882448821
Test ROC AUC score: 0.5491200070931419
Epoch 00120: early stopping
Training set accuracy score: 0.5730337094402392
Test set accuracy score: 0.532445923684044
Test ROC AUC score: 0.5529436538546793
Epoch 00090: early stopping
Training set accuracy score: 0.5455680376432659
Test set accuracy score: 0.5623960086886776
Test ROC AUC score: 0.5641707673892804
Training set accuracy score: 0.5767790267009116
Test set accuracy score: 0.5307820348592844
Test ROC AUC score: 0.551137119297779
Epoch 00083: early stopping
Training set accuracy score: 0.5638784854562393
Test set accuracy score: 0.5191347727958057
Test ROC AUC score: 0.5514917763887042
Epoch 00215: early stoppin

In [None]:
ds_features

In [None]:
ds_target