In [10]:
import os, gc, sys
import pygrib
import regionmask
import cartopy
import cartopy.crs as ccrs
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import multiprocessing as mp
import matplotlib.pyplot as plt 
import matplotlib as mpl

from scipy import stats
from glob import glob
from numpy import trapz
from scipy.integrate import simps
from functools import partial
from matplotlib import gridspec
from datetime import datetime, timedelta
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import colors

import warnings
warnings.filterwarnings('ignore')

os.environ['OMP_NUM_THREADS'] = '1'

In [20]:
# CONFIG # # CONFIG # # CONFIG # # CONFIG # # CONFIG # 

cwa = 'SEW'
site = 'KSEA'
lat = 47.4436
lon = -122.2961

# site = 'KOLM'
# lat = 46.9753
# lon = -122.8971

# site = 'KPDX'
# lat = 45.5898
# lon = -122.5951

# cwa = 'SLC'
# site = 'KSLC'
# lat = 40.7608
# lon = -111.8910

# site = 'KSFO'
# lat = 37.6213
# lon = -122.3790

# site = 'KMSO'
# lat = 46.9188
# lon = -114.0837

fhr_start, fhr_end, fhr_step = 24, 168, 24#108, 6

ver = '4p0'
start_date = datetime(2020, 10, 1, 0)
end_date = datetime(2021, 2, 1, 0)

interval = 24

produce_thresholds = [0.01, 0.1, 0.25, 0.50, 1.0]
bint, bins_custom = 10, None

n_events = 3
cx = 8 if cwa == 'WESTUS' else 4
cy = cx
# CONFIG # # CONFIG # # CONFIG # # CONFIG # # CONFIG # 

In [21]:
nbm_dir = '/scratch/general/lustre/u1070830/nbm/'
urma_dir = '/scratch/general/lustre/u1070830/urma/'
tmp_dir = '/scratch/general/lustre/u1070830/tmp/'
fig_dir = '/uufs/chpc.utah.edu/common/home/steenburgh-group10/mewessler/nbm/'
os.makedirs(tmp_dir, exist_ok=True)

In [13]:
extract_dir = nbm_dir + 'extract/'
extract_flist = sorted(glob(extract_dir + '*'))

if not os.path.isfile(urma_dir + 'agg/urma_agg.nc'):
    pass 
    #print('URMA aggregate not found')

else:
    #print('Getting URMA aggregate from file')
    urma = xr.open_dataset(urma_dir + 'agg/urma_agg.nc')['apcp24h_mm']

# urma = urma/25.4
# urma = urma.rename('apcp24h_in')
lons, lats = urma.lon, urma.lat

In [14]:
geodir = '../forecast-zones/'
zones_shapefile = glob(geodir + '*.shp')[0]

# Read the shapefile
zones = gpd.read_file(zones_shapefile)

# Prune to Western Region using TZ
zones = zones.set_index('TIME_ZONE').loc[['M', 'Mm', 'm', 'MP', 'P']].reset_index()
cwas = zones.dissolve(by='CWA').reset_index()[['CWA', 'geometry']]
_cwas = cwas.copy()

if cwa == 'WESTUS':
    _cwas['CWA'] = 'WESTUS'
    _cwas = _cwas.dissolve(by='CWA').reset_index()
    bounds = _cwas.total_bounds
else:
    bounds = _cwas[_cwas['CWA'] == cwa].bounds.values[0]
    
print(bounds)
    
lons, lats = urma.lon, urma.lat
mask = regionmask.mask_3D_geopandas(_cwas, lons, lats).rename({'region':'cwa'})
mask['cwa'] = _cwas.iloc[mask.cwa]['CWA'].values.astype(str)
mask = mask.sel(cwa=cwa)
mask

[-124.762578     46.38421249 -120.65499878   49.00241089]


In [15]:
idx = np.where(
    (urma.lat >= bounds[1]) & (urma.lat <= bounds[3]) &
    (urma.lon >= bounds[0]) & (urma.lon <= bounds[2]))

mask = mask.isel(y=slice(idx[0].min(), idx[0].max()), x=slice(idx[1].min(), idx[1].max()))
urma = urma.isel(y=slice(idx[0].min(), idx[0].max()), x=slice(idx[1].min(), idx[1].max()))
urma = urma.transpose('valid', 'y', 'x')

In [18]:
def extract_perc(_fhr, _urma):

    nbm_file = glob(nbm_dir + 'extract_new/nbm_perc_fhr%03d.nc'%_fhr)[0]
    
    # Subset the threshold value
    nbm = xr.open_dataset(nbm_file).sel(
    y=slice(idx[0].min(), idx[0].max()),
    x=slice(idx[1].min(), idx[1].max()))

    # Subset the times
    nbm_time = nbm.valid
    urma_time = _urma.valid
    time_match = nbm_time[np.in1d(nbm_time, urma_time)].values
    time_match = np.array([t for t in time_match if pd.to_datetime(t) >= start_date])
    time_match = np.array([t for t in time_match if pd.to_datetime(t) <= end_date])
    date0 = pd.to_datetime(time_match[0]).strftime('%Y/%m/%d %H UTC')
    date1 = pd.to_datetime(time_match[-1]).strftime('%Y/%m/%d %H UTC')

    _nbm = nbm.sel(valid=time_match)
    _urma = _urma.sel(valid=time_match)
    nbm_mask, _nbm = xr.broadcast(mask, _nbm)
    urma_mask, _urma = xr.broadcast(mask, _urma)

    _nbm_masked = xr.where(nbm_mask, _nbm, np.nan)
    _urma_masked = xr.where(urma_mask, _urma, np.nan)
    
    _urma_masked['fhr'] = _fhr
    
    return _nbm_masked, _urma_masked

In [19]:
fhrs = np.arange(fhr_start, fhr_end+1, fhr_step)
extract_perc_mp = partial(extract_perc, _urma=urma)

with mp.get_context('fork').Pool(len(fhrs)) as p:

    returns = p.map(extract_perc_mp, fhrs, chunksize=1)
    p.close()
    p.join()

IndexError: list index out of range

In [None]:
returns = np.array(returns)

In [None]:
nbm_pqpf = xr.concat(returns[:, 0], dim='fhr')['perc']
urma_tp = xr.concat(returns[:, 1], dim='fhr')

In [None]:
nbm_det = nbm_pqpf.isel(percentile=-1)
nbm_pqpf = nbm_pqpf.isel(percentile=slice(0, -1))

a = abs(urma.lat-lat)+abs(urma.lon-lon)
i, j = np.unravel_index(a.argmin(),a.shape)

nbm_pqpf = nbm_pqpf.isel(y=i, x=j)
nbm_det = nbm_det.isel(y=i, x=j)
urma_tp = urma_tp.isel(y=i, x=j)

In [None]:
fig, axs = plt.subplots(2, 4, facecolor='w', figsize=(30, 16))
axs = axs.flatten()

for icrit, ax in enumerate(axs):
    
    print('%d/%d'%(icrit+1, len(axs)))

    det_rank, cdf_rank = [], []
    for fhr in fhrs:

        _det_rank, _cdf_rank = [], []
        for valid in nbm_pqpf.valid:

            _nbm_pqpf = nbm_pqpf.sel(fhr=fhr, valid=valid)
            _nbm_det = nbm_det.sel(fhr=fhr, valid=valid)
            _urma_tp = urma_tp.sel(fhr=fhr, valid=valid)
            
            if icrit == 0:
                # No Criteria
                critA, critA_label = True, 'None'
                critB, critB_label = True, ''
                critC, critC_label = True, ''

            elif icrit == 1:
                # Observed Precip (URMA > 0)
                critA, critA_label = (_urma_tp >= 0.01), '(urma_tp >= 0.01)'
                critB, critB_label = True, ''
                critC, critC_label = True, ''

            elif icrit == 2:
                # Deterministc > 0
                critA, critA_label = True, ''
                critB, critB_label = (_nbm_det >= 0.01), '(nbm_det >= 0.01)'
                critC, critC_label = True, ''

            elif icrit == 3:
                # Observed Precip (URMA > 0) & Deterministc > 0
                critA, critA_label = (_urma_tp >= 0.01), '(urma_tp >= 0.01)&'
                critB, critB_label = (_nbm_det >= 0.01), '(nbm_det >= 0.01)'
                critC, critC_label = True, ''

            elif icrit == 4:
                # Deterministc > 0 & POP > 0
                critA, critA_label = (_nbm_det >= 0.01), '(_nbm_det >= 0.01)&'
                critB, critB_label = (_nbm_pqpf.sel(percentile=99) >= 0.01), '(_nbm_pqpf.sel(percentile=99) >= 0.01)'
                critC, critC_label = True, ''

            elif icrit == 5:
                # Deterministc > 0 & POP > 0 & Observed Precip (URMA > 0)
                critA, critA_label = (_urma_tp >= 0.01), '(urma_tp >= 0.01)&'
                critB, critB_label = (_nbm_det >= 0.01), '(_nbm_det >= 0.01)&\n'
                critC, critC_label = (_nbm_pqpf.sel(percentile=99) >= 0.01), '(_nbm_pqpf.sel(percentile=99) >= 0.01)'

            elif icrit == 6:
                # Deterministc > 0 & POP ≥ 50
                critA, critA_label = (_nbm_det >= 0.01), '(_nbm_det >= 0.01)&'
                critB, critB_label = (_nbm_pqpf.sel(percentile=50) >= 0.01), '(_nbm_pqpf.sel(percentile=50) >= 0.01)'
                critC, critC_label = True, ''

            elif icrit == 7:
                # Deterministc > 0 & POP ≥ 99
                critA, critA_label = (_nbm_det >= 0.01), '(_nbm_det >= 0.01)&'
                critB, critB_label = (_nbm_pqpf.sel(percentile=1) >= 0.01), '(_nbm_pqpf.sel(percentile=1) >= 0.01)'
                critC, critC_label = True, ''

            if (critA & critB & critC):
                
                cdf_vals = _nbm_pqpf.values
                cdf_mean = np.average(cdf_vals, weights=1-_nbm_pqpf.percentile/100)
                cdf_mean_index = _nbm_pqpf.searchsorted(cdf_mean)
                
                if cdf_mean_index > 0:
                    cdf_rank_bracket = _nbm_pqpf.isel(percentile=slice(cdf_mean_index-1, cdf_mean_index+1))
                    cdf_rank_value = np.interp(cdf_mean, cdf_rank_bracket, [cdf_mean_index-1, cdf_mean_index])*10
                    
                else:
                    cdf_rank_value = np.nan
                                
                _cdf_rank.append(cdf_rank_value)
                _det_rank.append(_nbm_pqpf.percentile[np.searchsorted(_nbm_pqpf, _nbm_det, 'right')-1])
                
            else:
                _det_rank.append(np.nan)
                _cdf_rank.append(np.nan)
                
        det_rank.append(_det_rank)
        cdf_rank.append(_cdf_rank)

    det_rank = np.array(det_rank)
    cdf_rank = np.array(cdf_rank)

    bins = np.arange(0, 101, 10)

    for i, fhr in enumerate(fhrs):

        try:
            x = det_rank[i, :]
            x = x[~np.isnan(x)]

            xx = np.linspace(0, 101, 1000)

            kde = stats.gaussian_kde(x)

            label = 'FHR%d'%fhr
            shades = np.linspace(.2, .8, len(fhrs))[::-1]
            ax.axvline(50, linestyle=(0, (5, 10)), color='k', linewidth=1.5)
            ax.plot(xx, kde(xx), linewidth=1.5, label=label, color='k', alpha=shades[i])
            ax.axvline(np.nanmean(cdf_rank[i, :]), color='brown', alpha=shades[i])
            
        except:
            pass

    ax.set_xlim([0, 100])
    ax.set_xticks(bins)

    ax.set_title('%s%s%s\n\nNBM4.0 | %s\n%s - %s\nDeterministic Rank in PQPF'%(
        critA_label, critB_label, critC_label, site, 
        pd.to_datetime(nbm_det.valid[0].values).strftime('%Y-%m-%d'),
        pd.to_datetime(nbm_det.valid[-1].values).strftime('%Y-%m-%d')))

    if icrit == 3:
        ax.plot(0, 0, c='brown', label='PQPF Rank')
        ax.legend(loc='center right', bbox_to_anchor=(1.30, 0.5)) #-0.25
    
    ax.grid()

fig.subplots_adjust(hspace=0.40)
plt.show()