In [21]:
import os
from datetime import datetime, timedelta
import re              # Used to search for a string in a line
import numpy as np
import urllib.request  # Used to download the file
import requests        # Used to check if a URL exists
import warnings

import pandas as pd    # Just used for the date_range function

In [22]:
def download_HRRR_subset(url, searchString, SAVEDIR='./', dryrun=False):
    """
    Download a subset of GRIB fields from a HRRR file.
    
    This assumes there is an index (.idx) file available for the file.
    
    Parameters
    ----------
    url : string
        The URL for the HRRR file you are trying to download. There must be an 
        index file for the GRIB2 file. For example, if 
        ``url='https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200624/hrrr.t01z.wrfsfcf17.grib2'``,
        then ``https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200624/hrrr.t01z.wrfsfcf17.grib2.idx``
        must also exist on the server.
    searchString : str
        The string you are looking for in each line of the index file. 
        Take a look at the 
        .idx file at https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200624/hrrr.t01z.wrfsfcf17.grib2.idx
        to get familiar with what is in each line.
        Also look at this webpage: http://hrrr.chpc.utah.edu/HRRR_archive/hrrr_sfc_table_f00-f01.html
        for additional details.**You should focus on the variable and level 
        field for your searches**.
        
        You may use regular expression syntax to customize your search. 
        Check out this regulare expression cheatsheet:
        https://link.medium.com/7rxduD2e06
        
        Here are a few examples that can help you get started
        
        ================ ===============================================
        ``searchString`` Messages that will be downloaded
        ================ ===============================================
        ':TMP:2 m'       Temperature at 2 m.
        ':TMP:'          Temperature fields at all levels.
        ':500 mb:'       All variables on the 500 mb level.
        ':APCP:'         All accumulated precipitation fields.
        ':UGRD:10 m:'    U wind component at 10 meters.
        ':(U|V)GRD:'     U and V wind component at all levels.
        ':.GRD:'         (Same as above)
        ':(TMP|DPT):'    Temperature and Dew Point for all levels .
        ':(TMP|DPT|RH):' TMP, DPT, and Relative Humidity for all levels.
        ':REFC:'         Composite Reflectivity
        ':surface:'      All variables at the surface.
        ================ ===============================================    
        
    SAVEDIR : string
        Directory path to save the file, default is the current directory.
    dryrun : bool
        If True, do not actually download, but print out what the function will
        attempt to do.
    
    Returns
    -------
    The path and name of the new file.
    """
    # Ping Pando first. This *might* prevent a "bad handshake" error.
    if 'pando' in url:
        try:
            requests.head('https://pando-rgw01.chpc.utah.edu/')
        except:
            print('bad handshake...am I able to on?')
            pass
    
    # Make SAVEDIR if path doesn't exist
    if not os.path.exists(SAVEDIR):
        os.makedirs(SAVEDIR)
        print(f'Created directory: {SAVEDIR}')

    
    # Make a request for the .idx file for the above URL
    idx = url + '.idx'
    r = requests.get(idx)

    # Check that the file exists. If there isn't an index, you will get a 404 error.
    if not r.ok: 
        print('❌ SORRY! Status Code:', r.status_code, r.reason)
        print(f'❌ It does not look like the index file exists: {idx}')

    # Read the text lines of the request
    lines = r.text.split('\n')
    
    # Search expression
    expr = re.compile(searchString)

    # Store the byte ranges in a dictionary
    #     {byte-range-as-string: line}
    byte_ranges = {}
    for n, line in enumerate(lines, start=1):
        # n is the line number (starting from 1) so that when we call for 
        # `lines[n]` it will give us the next line. (Clear as mud??)

        # Use the compiled regular expression to search the line
        if expr.search(line):   
            # aka, if the line contains the string we are looking for...

            # Get the beginning byte in the line we found
            parts = line.split(':')
            rangestart = int(parts[1])

            # Get the beginning byte in the next line...
            if n+1 < len(lines):
                # ...if there is a next line
                parts = lines[n].split(':')
                rangeend = int(parts[1])
            else:
                # ...if there isn't a next line, then go to the end of the file.
                rangeend = ''

            # Store the byte-range string in our dictionary, 
            # and keep the line information too so we can refer back to it.
            byte_ranges[f'{rangestart}-{rangeend}'] = line
    
    # What should we name the file we save this data to?
    # Let's name it something like `subset_20200624_hrrr.t01z.wrfsfcf17.grib2`
    runDate = list(byte_ranges.items())[0][1].split(':')[2][2:-2]
    outFile = '_'.join(['subset', runDate, url.split('/')[-1]])
    outFile = os.path.join(SAVEDIR, outFile)
    
    for i, (byteRange, line) in enumerate(byte_ranges.items()):
        
        if i == 0:
            # If we are working on the first item, overwrite the existing file.
            curl = f'curl -s --range {byteRange} {url} > {outFile}'
        else:
            # If we are working on not the first item, append the existing file.
            curl = f'curl -s --range {byteRange} {url} >> {outFile}'
            
        num, byte, date, var, level, forecast, _ = line.split(':')
        
        if dryrun:
            print(f'  🌵 Dry Run: Found GRIB line [{num}]: variable={var}, level={level}, forecast={forecast}')
            print(f'  🌵 Dry Run: `{curl}`')
        else:
            print(f'  Downloading GRIB line [{num}]: variable={var}, level={level}, forecast={forecast}')    
            os.system(curl)
    
    if dryrun:
        print(f'🌵 Dry Run: Success! Searched for [{searchString}] and found [{len(byte_ranges)}] GRIB fields.')
        print(f'🌵 Dry Run: Would save as {outFile}')
    else:
        print(f'✅ Success! Searched for [{searchString}] and got [{len(byte_ranges)}] GRIB fields.')
        print(f'    Saved as {outFile}')
    
        return outFile

In [23]:
# Set the start and end date for the HRRR files we want to download
sDATE = datetime(2020,3,27,18)
eDATE = datetime(2020,3,27,18)

# Create a list of datetimes we want to download with Pandas `date_range` function.
# The HRRR model is run every hour, so make a list of every hour
DATES = pd.date_range(sDATE, eDATE, freq='1H')
DATES

DatetimeIndex(['2020-03-27 18:00:00'], dtype='datetime64[ns]', freq='H')

In [24]:
fxx = range(0,37,1)
list(fxx)

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36]

In [26]:
URL_list = [f'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/{DATE:%Y%m%d}/hrrr.t{DATE:%H}z.wrfsfcf{f:02d}.grib2' for DATE in DATES for f in fxx]
URL_list

['https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf00.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf01.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf02.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf03.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf04.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf05.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf06.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf07.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf08.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf09.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf10.grib2',
 'https://pando-rgw01.chpc.utah.edu/hrrr/sfc/20200327/hrrr.t18z.wrfsfcf11.grib2',
 'https://pando-

In [27]:
for i in range(0,len(URL_list)-1):
    head = requests.head(URL_list[i])
    check_exists = head.ok
    check_content = int(head.raw.info()['Content-Length']) > 1000000
    if check_exists and check_content:
        download_HRRR_subset(URL_list[i],":TMP:2 m",SAVEDIR='./putInThisDir/')
    else:
        print()
        print(f'❌ WARNING: Status code {head.status_code}: {head.reason}. Content-Length: {int(head.raw.info()["Content-Length"]):,} bytes')
        print(f'❌ Could not download {head.url}')

  Downloading GRIB line [66]: variable=TMP, level=2 m above ground, forecast=anl
✅ Success! Searched for [:TMP:2 m] and got [1] GRIB fields.
    Saved as ./putInThisDir/subset_20200327_hrrr.t18z.wrfsfcf00.grib2
  Downloading GRIB line [66]: variable=TMP, level=2 m above ground, forecast=1 hour fcst
✅ Success! Searched for [:TMP:2 m] and got [1] GRIB fields.
    Saved as ./putInThisDir/subset_20200327_hrrr.t18z.wrfsfcf01.grib2
  Downloading GRIB line [66]: variable=TMP, level=2 m above ground, forecast=2 hour fcst
✅ Success! Searched for [:TMP:2 m] and got [1] GRIB fields.
    Saved as ./putInThisDir/subset_20200327_hrrr.t18z.wrfsfcf02.grib2


ValueError: not enough values to unpack (expected 7, got 6)