In [None]:
import os
import csv
import urllib.request as req

import numpy as np
import pandas as pd
import xarray as xr

import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [None]:
def get_1d_csv(get_req, this, total):

    _date, _init_hour, _url = get_req
    
    try:
        response = req.urlopen(_url).read().decode('utf-8')
        print('\r[%d/%d] %s %s'%(this, total, _date, _init_hour), end='')
        
    except:
        print('\r[%d/%d] NOT FOUND %s %s'%(this, total, _date, _init_hour), end='')
        return None
    
    else:
        init = datetime(_date.year, _date.month, _date.day, _init_hour, 0)

        response = response.split('\n')
        header = np.append('InitTime', response[0].split(','))
        
        lines = []
        for line in response[1:]:
            line = line.split(',')

            try:
                line[0] = datetime.strptime(line[0], '%Y%m%d%H')
            except:
                pass
            else:
                lines.append(np.append(init, line))
                        
        return header, lines

In [None]:
def get_precip_obs(s, d0, d1):
    import requests

    # Tokens registered to michael.wessler@noaa.gov
    api_token = 'a2386b75ecbc4c2784db1270695dde73'
    api_key = 'Kyyki2tc1ETHUgShiscycW15e1XI02SzRXTYG28Dpg'
    base = 'https://api.synopticdata.com/v2/stations/precip?'
    
    allints = []
    
    forecast_interval = 6
    for interval in [6, 12, 24]:
        
        # Limit how big the observation lag can be (minutes)
        lag_limit = (interval/2)*60
        repeat = int((interval-forecast_interval)/6)+1
        
        df = []
        while repeat > 0:
            print('Working: Interval {}h Iteration {}'.format(interval, repeat))
                        
            _d0 = d0+timedelta(hours=(forecast_interval)*(repeat-1))
            _d1 = d1+timedelta(hours=1+forecast_interval*(repeat-1))
            
            url = base + 'stid={}&start={}&end={}&pmode=intervals&interval={}&token={}'.format(
                s,
                datetime.strftime(_d0, '%Y%m%d%H%M'),
                datetime.strftime(_d1, '%Y%m%d%H%M'),
                interval, api_token)
            
            api_data_raw = requests.get(url).json()

            vdates = pd.date_range(_d0, _d1, freq='%dh'%interval)
            
            for i in api_data_raw['STATION'][0]['OBSERVATIONS']['precipitation']:
                
                if i['last_report'] is not None:
                    
                    try:
                        last_rep = datetime.strptime(i['last_report'], '%Y-%m-%dT%H:%M:%SZ')
                        vtime = vdates[np.argmin(np.abs(vdates - last_rep))]
                        lag_mins = (vtime - last_rep).seconds/60
                        value = float(i['total']) if lag_mins < lag_limit else np.nan
                    except:
                        pass
                    else:
                        #print('{}\t{}\t{}\t{}'.format(vtime, last_rep, lag_mins, value))
                        df.append([vtime, last_rep, lag_mins, value])
                    
            repeat -= 1

        allints.append(pd.DataFrame(df, 
            columns=['ValidTime', 'last_report', '%sh_lag_mins'%interval, '%sh_precip_mm'%interval]
            ).set_index('ValidTime').sort_index())

    return allints

In [None]:
# NBM 1D Viewer Site to use
site = 'KMSO'
vsite = 'KMSO' #expand to list of sties later on?
init_hours = [1, 7, 13, 19] #[4, 16]

datadir = './%s/data/'%site
os.makedirs(datadir, exist_ok=True)

figdir = './%s/figures/'%site
os.makedirs(figdir, exist_ok=True)

# Data Range
date0 = datetime(2020, 2, 20)
date1 = datetime(2020, 6, 20)
dates = pd.date_range(date0, date1, freq='1D')

lead = 263 #10d @ 240h
date2 = date1 + timedelta(hours=lead)

date0, date1, date2

In [None]:
obfile = datadir + '%s_obs_%s_%s.pd'%(site, date0.strftime('%Y%m%d'), date1.strftime('%Y%m%d'))

if os.path.isfile(obfile):
    # Load file
    obs = pd.read_pickle(obfile)
    print('Loaded obs from file %s'%obfile)

else:
    # Get and save file
    obs = get_precip_obs(vsite, date0, date2)
    obs = obs[0].merge(obs[1], how='inner', on='ValidTime').merge(obs[2], how='inner', on='ValidTime')
    obs = obs[[k for k in obs.keys() if 'precip' in k]].sort_index()

    obs.to_pickle(obfile)
    print('Saved obs to file %s'%obfile)

obs

In [None]:
nbmfile = datadir + '%s_nbm_%s_%s.pd'%(site, date0.strftime('%Y%m%d'), date1.strftime('%Y%m%d'))

if os.path.isfile(nbmfile):
    # Load file
    nbm = pd.read_pickle(nbmfile)
    print('Loaded NBM from file %s'%nbmfile)

else:
    url_list = []
    for date in dates:
        for init_hour in init_hours:
            # For now pull from the csv generator
            # Best to get API access or store locally later
            base = 'https://hwp-viz.gsd.esrl.noaa.gov/wave1d/data/archive/'
            datestr = '{:04d}/{:02d}/{:02d}'.format(date.year, date.month, date.day)
            sitestr = '/NBM/{:02d}/{:s}.csv'.format(init_hour, site)
            url_list.append([date, init_hour, base + datestr + sitestr])

    # Try multiprocessing this for speed?
    nbm = np.array([get_1d_csv(url, this=i+1, total=len(url_list)) for i, url in enumerate(url_list)])
    nbm = np.array([line for line in nbm if line is not None])

    header = nbm[0, 0]
    
    # This drops days with incomplete collections. There may be some use
    # to keeping this data, can fix in the future if need be
    # May also want to make the 100 value flexible!
    nbm = np.array([np.array(line[1]) for line in nbm if len(line[1]) == 100])

    nbm = nbm.reshape(-1, nbm.shape[-1])
    nbm[np.where(nbm == '')] = np.nan

    # Aggregate to a clean dataframe
    nbm = pd.DataFrame(nbm, columns=header).set_index(
        ['InitTime', 'ValidTime']).sort_index()

    # Drop last column (misc metadata?)
    nbm = nbm.iloc[:, :-2].astype(float)
    header = nbm.columns

    # variables = np.unique([k.split('_')[0] for k in header])
    # levels = np.unique([k.split('_')[1] for k in header])

    init =  nbm.index.get_level_values(0)
    valid = nbm.index.get_level_values(1)

    # Note the 1h 'fudge factor' in the lead time here
    lead = pd.DataFrame(
        np.transpose([init, valid, ((valid - init).values/3600/1e9).astype(int)+1]), 
        columns=['InitTime', 'ValidTime', 'LeadTime']).set_index(['InitTime', 'ValidTime'])

    nbm.insert(0, 'LeadTime', lead)

    klist = np.array([k for k in np.unique([k for k in list(nbm.keys())]) if ('APCP' in k)&('1hr' not in k)])
    klist = klist[np.argsort(klist)]
    klist = np.append('LeadTime', klist)
    nbm = nbm.loc[:, klist]
    
    # Nix values where lead time shorter than acc interval
    for k in nbm.keys():
        if 'APCP24hr' in k:
            nbm[k][nbm['LeadTime'] < 24] = np.nan
        elif 'APCP12hr' in k:
            nbm[k][nbm['LeadTime'] < 12] = np.nan
        elif 'APCP6hr' in k:
            nbm[k][nbm['LeadTime'] < 6] = np.nan
        else:
            pass
    
    nbm.to_pickle(nbmfile)
    print('\nSaved NBM to file %s'%obfile)
    
nbm

In [None]:
lead12 = nbm[nbm['LeadTime'] == 12].reset_index().drop(columns='InitTime').set_index('ValidTime')
pd.DataFrame(lead12['APCP12hr_surface'])

In [None]:
test = pd.DataFrame(lead12['APCP6hr_surface_70% level']).merge(obs['6h_precip_mm'], on='ValidTime')
test

In [None]:
test.plot()

In [None]:
for plev in [5, 30, 50, 70, 95]:

    me = []
    for fi in [6, 12, 24]:
        _me = []
        for lt in np.arange(6, 240+1, 6):
            fk = 'APCP%dhr_surface_%d%% level'%(fi, plev) #'APCP%dhr_surface'%fi
            vk = '%sh_precip_mm'%fi

            fx = nbm[fk][nbm['LeadTime'] == lt].reset_index().drop(columns='InitTime').set_index('ValidTime')
            vx = pd.DataFrame(fx).merge(obs[vk], on='ValidTime').dropna()

            vx = vx[vx > 0]

            # Mean error
            _me.append((vx[fk] - vx[vk]).mean())

        me.append(np.array(_me))

    me = np.array(me)

    fig, ax = plt.subplots(1, figsize=(30, 6), facecolor='w')

    cmap = plt.cm.bwr_r
    cmap.set_bad('black',1.)

    cbd = ax.pcolormesh(me, cmap=cmap, vmin=-10, vmax=10)
    cb = plt.colorbar(cbd, ax=ax, orientation='horizontal', label='\nAbsolute Error [mm]\n\nNaN Blacked Out')

    ax.set_xlabel('\nLead Time')
    ax.set_xticks(np.arange(1, len(np.arange(6, 240+7, 6)))-.5)
    ax.set_xticklabels(np.arange(6, 240+1, 6))

    ax.set_ylabel('\nAccumulation Interval')
    ax.set_yticks([0.5, 1.5, 2.5])
    ax.set_yticklabels([6, 12, 24])

    plt.title('QPF Forecast Error for %s\n\np%d'%(site, plev))
    
    figname = '%s_p%s_meanerr.png'%(site, plev)
    plt.savefig(figdir + figname)
    
    plt.show()

In [None]:
me = []
for fi in [6, 12, 24]:
    _me = []
    for lt in np.arange(6, 240+1, 6):
        fk = 'APCP%dhr_surface'%fi
        vk = '%sh_precip_mm'%fi

        fx = nbm[fk][nbm['LeadTime'] == lt].reset_index().drop(columns='InitTime').set_index('ValidTime')
        vx = pd.DataFrame(fx).merge(obs[vk], on='ValidTime').dropna()

        vx = vx[vx > 0]

        # Mean error
        _me.append((vx[fk] - vx[vk]).mean())

    me.append(np.array(_me))

me = np.array(me)

fig, ax = plt.subplots(1, figsize=(30, 6), facecolor='w')

cmap = plt.cm.bwr_r
cmap.set_bad('black',1.)

cbd = ax.pcolormesh(me, cmap=cmap, vmin=-10, vmax=10)
cb = plt.colorbar(cbd, ax=ax, orientation='horizontal', label='\nAbsolute Error [mm]\n\nNaN Blacked Out')

ax.set_xlabel('\nLead Time')
ax.set_xticks(np.arange(1, len(np.arange(6, 240+7, 6)))-.5)
ax.set_xticklabels(np.arange(6, 240+1, 6))

ax.set_ylabel('\nAccumulation Interval')
ax.set_yticks([0.5, 1.5, 2.5])
ax.set_yticklabels([6, 12, 24])

plt.title('QPF Forecast Error for %s\n'%(site))

figname = '%s_meanerr.png'%(site)
plt.savefig(figdir + figname)

plt.show()