In [None]:
import numpy as np
import pygrib
from scipy import optimize

import matplotlib as mpl
import matplotlib.pyplot as plt 
import matplotlib.colors as colors
from mpl_toolkits.basemap import Basemap, cm

import time

In [None]:
approx_type = 'both' # 'unif', 'var', or 'both'
include_temp = False # does not approximate temperature data yet
error_calc = True
save = True
month = 'june'
include_obs = True
boundary = None
boundary_crps = None

In [None]:
# read in grib file
# month's completed: 
month = 'june'
if month == 'jan':
    date = '20220111'
    lead_time = '00'
    forecast_time = '018'
elif month == 'march':
    date = '20220324'
    lead_time = '12'
    forecast_time = '018'
elif month == 'april':
    date = '20220425'
    lead_time = '12'
    forecast_time = '018'
elif month == 'june':
    date = '20220626'
    lead_time = '12'
    forecast_time = '006'
elif month == 'july':
    date = '20220703'
    lead_time = '00'
    forecast_time = '018'
elif month == 'sept':
    date = '20210920'
    lead_time = '12'
    forecast_time = '018'
elif month == 'oct':
    date = '20211015'
    lead_time = '00'
    forecast_time = '018'
elif month == 'dec':
    date = '20211205'
    lead_time = '12'
    forecast_time = '018'
elif month == 'wet':
    date = ''
    lead_time = '00'
    forecast_time = '012'
else:
    print('Issue with month input.')
fn_grb = 'data/blend' + date + '.t' + lead_time + 'z.qmd.f' + forecast_time + '.co.grib2'
ds_grb = pygrib.open(fn_grb)

# latitude and longitude grid values
lat, long = ds_grb.message(2).data()[1:]

# extracting data
precip_shape = lat.shape
precip = np.zeros(shape=(99,)+precip_shape)
for i in range(99):
    precip[i,:,:] = ds_grb.message(i+2).data()[0] # ACPC:surface:12-18 hour acc fcst
ds_grb.close()

In [None]:
if include_obs:
    fn_grb = 'data/urma2p5.2022062612.pcp_06h.wexp.grb2'
    ds_grb = pygrib.open(fn_grb)
    obs = ds_grb.message(1).data()[0]
    ds_grb.close()

In [None]:
include_temp = False
if include_temp:
    ds_grb = pygrib.open(fn_grb)
    temp_shape = lat.shape
    temp = np.zeros(shape=(99,)+temp_shape)
    for i in range(99):
        temp[i,:,:] = ds_grb.message(i+215).data()[0] # TMP:2 m above ground:0-18 hour max
    ds_grb.close()

In [None]:
# masking precip at grid points that are not monotonic
mask = np.zeros(precip.shape)
for i in range(lat.shape[0]):
    for j in range(lat.shape[1]):
        issue = False
        for level in range(99-1):
            if precip[level,i,j] > precip[level+1,i,j]:
                if not issue:
                    issue = True
                    #mask[level+1:,i,j] = np.ones(mask.shape[0] - level - 1)
                    mask[:,i,j] = np.ones(mask.shape[0])
                    
precip = np.ma.masked_array(precip, mask)

In [None]:
obs_unmasked = obs
obs = np.ma.masked_array(obs, mask[-1,:,:])

In [None]:
levels = np.linspace(1,99,99)
qs = levels / 100
N = int(1e3)
if include_obs:
    data_max = max(obs.max(), precip[-1,:,:].max())
else:
    data_max = precip[-1,:,:].max()
xs = np.linspace(0.0, data_max, N)

# linear spline functions

def linear_splines_unif(data, num_knots=10, zero_inflated=True):   
    '''
    Calculates piecewise linear splines for quantile data using specified number of 
    knots uniformly spaced and returning interpolated approximation at every 
    quantile level.
    ''' 

    # checking if cdf is all zero
    #if data[-1] == 0:
    #    return np.zeros(99)
    
    if zero_inflated:
        # calculating where cdf starts being nonzero (all zero cdf's should not be inputted)
        knot_ = np.where(data > 0)[0].min() - 1   
        if knot_ > 1:
            knots = np.unique(np.linspace(knot_-1, 98, num_knots-1, dtype=int))
            knots = np.insert(knots, 0, 0)
        else:
            knots = np.unique(np.linspace(0, 98, num_knots, dtype=int))
    else:
        knots = np.unique(np.linspace(0, 98, num_knots, dtype=int))
        
    return np.interp(levels, knots+1, data[knots])

def linear_splines(x, num_knots, *params):
    '''
    Function to be used in scipy.optimize.curve_fit in linear_splines_var function.
    '''

    knot_vals = list(params[0][0:num_knots])
    knots = list(params[0][num_knots:])
    return np.interp(x, knots, knot_vals)

def linear_splines_var(data, num_knots=5, zero_inflated=True):
    '''
    Calculates piecewise linear splines for quantile data using specified number of
    knots with optimized placement and returning interpolated approximation at every
    quantile level with level_width.
    '''

    # checking if cdf is all zero
    #if data[-1] == 0:
    #    return np.zeros(99)
    
    data_ = data # saving full set of data in case optimize.curve_fit fails
    
    if zero_inflated:
        # calculating where cdf starts being nonzero (all zero cdf's should not be inputted)
        idx_start = np.where(data > 0)[0].min() - 1   
        data = data[idx_start:]
        if 99-idx_start < num_knots*2:
            return linear_splines_unif(data_, num_knots=5, zero_inflated=zero_inflated)
    else:
        idx_start = 0
        
    # setting up intial value of parameters
    p_0 = np.linspace(idx_start,98,num_knots)
    p_0 = np.hstack([np.interp(p_0, levels[idx_start:], data), p_0])

    # try to fit parameters with RuntimeError exception that returns linear_splines_unif
    # that uses uniformly space knots
    try:
        fit, _ = optimize.curve_fit(lambda x, *params : linear_splines(x, num_knots, params), 
                np.array(range(idx_start+1,100)), data, p_0)
        levels_ = range(idx_start+1,100)
        for i in range(num_knots-1):
            if fit[:num_knots][i+1] < fit[:num_knots][i]:
                return linear_splines_unif(data_, num_knots, zero_inflated)
        return np.hstack([np.zeros(idx_start), np.interp(levels_, fit[num_knots:], fit[:num_knots])])
    except RuntimeError:
        return linear_splines_unif(data_, num_knots, zero_inflated)

def calc_errors(orig, approx):
    if orig[-1] == 0 and approx[-1] == 0:
        return np.zeros(3)
    else:
        differences = np.abs(np.interp(xs, orig, qs, left=0.0, right=1.0) - np.interp(xs, approx, qs, left=0.0, right=1.0))
        return [differences.max(), np.mean(differences), np.mean(differences**2)] # [KS, mean L_1, CRPS]

def obs_CRPS(obs, approx):
    obs_cdf = np.zeros(N)
    obs_nonzero = np.where(xs >= obs)[0]
    obs_cdf[obs_nonzero] = np.ones(obs_nonzero.shape[0])
    return np.mean((np.interp(xs, approx, qs, left=0.0, right=1.0) - obs_cdf)**2)

In [None]:
precip_unif = np.load('results/' + date + '/precip_unif', allow_pickle=True)
precip_var = np.load('results/' + date + '/precip_var', allow_pickle=True)
errors_unif = np.load('results/' + date + '/errors_unif', allow_pickle=True)
errors_var = np.load('results/' + date + '/errors_var', allow_pickle=True) 
obs_crps = np.load('results/' + date + '/obs_crps', allow_pickle=True)
obs_unif_crps = np.load('results/' + date + '/obs_unif_crps', allow_pickle=True)
obs_var_crps = np.load('results/' + date + '/obs_var_crps', allow_pickle=True)

In [None]:
# graphing using basemap

def Basemap_plot(data, long, lat, diff=False, name=None, color_label='mm of precipiation', boundary=None):
            
    map = Basemap(llcrnrlon=-123.,llcrnrlat=20., urcrnrlon=-59., urcrnrlat=48., projection='lcc', lat_1=38.5, lat_0=38.5, lon_0=-97.5, resolution='l')

    # draw coastlines, country boundaries, fill continents
    map.drawcoastlines(linewidth=0.25)
    map.drawcountries(linewidth=0.25)
    map.fillcontinents(color='xkcd:white',lake_color='xkcd:white')



    # draw the edge of the map projection region (the projection limb)
    map.drawmapboundary(fill_color='xkcd:white')
    map.drawstates()

    # draw lat/lon grid lines every 30 degrees.
    map.drawmeridians(np.arange(-180,180,30))
    map.drawparallels(np.arange(-90,90,30))

    x, y = map(long, lat)

    if diff:
        if boundary is None:
            boundary = int(np.ceil(max(np.abs(data.min()), np.abs(data.max()))))
        if isinstance(boundary, int):
            levels = np.array(range(-boundary, boundary+1))/2
        else:
            levels = np.linspace(-boundary, boundary, 10)
        plt.pcolormesh(x, y, data, norm=colors.Normalize(vmin=levels[0], vmax=levels[-1]), cmap='seismic')
        map.colorbar()
        map.colorbar().set_label(color_label)
    else:
        map.contourf(x, y, data, 16, linewidths=1.5)
        map.colorbar()
        map.colorbar().set_label(color_label)
        
    if name is not None:
        plt.title(name)
    
    plt.show()

In [None]:
level = 95
level_idx = np.where(np.array([5,25,50,75,95]) == level)[0]
diff_unif = (precip_unif[level_idx,:,:] - precip[level-1,:,:])[0]
diff_var = (precip_var[level_idx,:,:] - precip[level-1,:,:])[0]
if boundary is None:
    boundary = int(np.ceil(max(np.abs(diff_unif.min()), np.abs(diff_unif.max()), np.abs(diff_var.min()), np.abs(diff_var.max()))))
Basemap_plot(data=precip[level-1,:,:], long=long, lat=lat, name=f'Precipitation at {level}% quantile')
Basemap_plot(data=diff_unif, long=long, lat=lat, diff=True, name=f'Uniform node error at {level}% quantile', boundary=boundary)
Basemap_plot(data=diff_var, long=long, lat=lat, diff=True, name=f'Variable node error at {level}% quantile', boundary=boundary)
Basemap_plot(data=errors_unif[0,:,:], long=long, lat=lat, name=f'KS statistic for uniform nodes', color_label='KS')
Basemap_plot(data=errors_unif[1,:,:], long=long, lat=lat, name=f'L_1 norm for uniform nodes', color_label='L_1 norm')
Basemap_plot(data=errors_unif[2,:,:], long=long, lat=lat, name=f'CRPS for uniform nodes', color_label='CRPS')
Basemap_plot(data=errors_var[0,:,:], long=long, lat=lat, name=f'KS statistic for variable nodes', color_label='KS')
Basemap_plot(data=errors_var[1,:,:], long=long, lat=lat, name=f'L_1 norm for variable nodes', color_label='L_1 norm')
Basemap_plot(data=errors_var[2,:,:], long=long, lat=lat, name=f'CRPS for variable nodes', color_label='CRPS')
Basemap_plot(data=obs_crps, long=long, lat=lat, name=f'CRPS for original forecast compared to observed', color_label='CRPS')
Basemap_plot(data=obs_unif_crps, long=long, lat=lat, name=f'CRPS for uniform nodes compared to observed', color_label='CRPS')
Basemap_plot(data=obs_var_crps, long=long, lat=lat, name=f'CRPS for variable nodes compared to observed', color_label='CRPS')

In [None]:
diff_unif_crps = obs_unif_crps - obs_crps
diff_var_crps = obs_var_crps - obs_crps
boundary_crps = 0.001
if boundary_crps is None:
    boundary_crps = int(np.ceil(max(np.abs(diff_unif_crps.min()), np.abs(diff_unif_crps.max()), np.abs(diff_var_crps.min()), np.abs(diff_var_crps.max()))))
Basemap_plot(data=diff_unif_crps, long=long, lat=lat, diff=True, name=f'Difference CRPS for uniform nodes', boundary=boundary_crps, color_label='CRPS')
Basemap_plot(data=diff_var_crps, long=long, lat=lat, diff=True, name=f'Difference CRPS for variable nodes', boundary=boundary_crps, color_label='CRPS')