In [1]:
import os, sys, gc
import pygrib, cfgrib

import numpy as np
import xarray as xr
import pandas as pd
import multiprocessing as mp
import matplotlib.pyplot as plt

from glob import glob
from datetime import datetime, timedelta

os.environ['OMP_NUM_THREADS'] = '1'

In [2]:
upgrade_date = datetime(2020, 9, 29, 6)
nbm_dir = '/scratch/general/lustre/u1070830/nbm/'

In [3]:
init_hour = 0
init_freq = '24H'

forecast_hours = np.arange(36, 36+1, 3)

inits = pd.date_range(
    datetime(2020, 5, 18, init_hour, 0),
    datetime(2020, 11, 15, init_hour, 0),
    freq=init_freq)

flist = []
for init in inits:

    if init < upgrade_date:
        init += timedelta(hours=1)
        fhrs = forecast_hours-1
    else:
        fhrs = forecast_hours
                
    for fhr in fhrs:
        
        search_str = nbm_dir + '%s/*t%02dz*f%03d*WR.grib2'%(
            init.strftime('%Y%m%d'), init.hour, fhr)
        search = glob(search_str)
        
        if len(search) > 0:
            flist.append(search[0])
        
flist = np.array(sorted(flist))
print(len(flist))

159


In [4]:
def unpack_fhr(nbm_file):
            
    with pygrib.open(nbm_file) as grb:

        lats, lons = grb.message(1).latlons()
        
        init_time = nbm_file.split('/')[-2:]
        init_time = init_time[0] + init_time[1].split('.')[1][1:3]
        init_time = datetime.strptime(init_time, '%Y%m%d%H')
        valid_fhr = int(os.path.basename(nbm_file).split('/')[-1].split('.')[3][1:])

        # Check if nbm3.2
        if init_time.hour in [1, 7, 13, 19]:
            init_time -= timedelta(hours=1)
            valid_fhr += 1

        valid_time = init_time + timedelta(hours=valid_fhr)
        print(init_time, valid_fhr, valid_time)

        percentile, probability, deterministic = [], [], []
        percentile_labels, probability_labels, deterministic_labels = [], [], []
        
        data = []
        for msg in grb.read():

            interval = msg['stepRange'].split('-')
            interval = int(interval[1]) - int(interval[0])
            
            if interval == 24:
                
                if 'Probability of event' in str(msg):
                    
                    threshold = round(msg['upperLimit']/25.4, 2)
                    
                    if threshold in [0.01, 0.25, 0.50]:
                        
                        idata = xr.DataArray(msg.data()[0], name='probx',
                                             dims=('y', 'x'), 
                                             coords={'lat':(('y', 'x'), msg.data()[1]), 
                                                     'lon':(('y', 'x'), msg.data()[1])})
                        
                        idata['init'] = init_time
                        idata['valid'] = valid_time
                        idata['fhr'] = valid_fhr
                        idata['interval'] = interval
                        idata['threshold'] = threshold

                        data.append(idata)
    
    gc.collect()
    data = xr.concat(data, dim='threshold') 
                        
# unpack_fhr(flist[1])

In [None]:
with mp.get_context('fork').Pool(86) as p:
    returns = p.map(unpack_fhr, flist, chunksize=1)
    p.close()
    p.join()
    
returns = [item for item in returns if item is not None]
returns = xr.concat(returns, dim='valid')

2020-06-26 00:00:00 36 2020-06-27 12:00:00
2020-06-30 00:00:00 36 2020-07-01 12:00:00
2020-07-06 00:00:00 36 2020-07-07 12:00:00
2020-06-03 00:00:00 36 2020-06-04 12:00:00
2020-06-04 00:00:00 36 2020-06-05 12:00:00
2020-07-05 00:00:00 36 2020-07-06 12:00:00
2020-06-10 00:00:00 36 2020-06-11 12:00:00
2020-06-11 00:00:00 36 2020-06-12 12:00:00
2020-06-24 00:00:00 36 2020-06-25 12:00:00
2020-06-21 00:00:00 2020-06-25 00:00:0036  36 2020-06-22 12:00:002020-06-26 12:00:00

2020-06-19 00:00:00 36 2020-06-20 12:00:00
2020-06-17 00:00:00 36 2020-06-18 12:00:00
2020-06-29 00:00:00 36 2020-06-30 12:00:00
2020-06-22 00:00:00 36 2020-06-23 12:00:00
2020-05-23 00:00:00 36 2020-05-24 12:00:00
2020-06-12 00:00:00 36 2020-06-20 00:00:002020-06-13 12:00:00
 362020-05-29 00:00:00 2020-06-21 12:00:00 
36 2020-05-30 12:00:00
2020-06-05 00:00:00 36 2020-06-06 12:00:00
2020-06-28 00:00:00 36 2020-05-26 00:00:002020-06-29 12:00:00 
36 2020-05-27 12:00:00
2020-05-28 00:00:00 36 2020-05-29 12:00:00
2020-05-19 

In [None]:
returns