The NOAA HRRR is a real-time 3km resolution, hourly updated, cloud-resolving, convection-allowing atmospheric model, initialized by 3km grids with 3km radar assimilation.

This notebook provides an example of accessing HRRR data, including (1) finding the data file corresponding to a date and time, (2) retrieving a portion of that file from blob storage which includes the surface temperature variable, (3) opening the file using the xarray library, and (4) rendering an image of the forecast.

This dataset is documented at http://aka.ms/ai4edata-hrrr.

In [1]:
import climatedata_functions as climf
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
metadata = pd.read_csv("../data/metadata.csv")

In [3]:
ds = climf.get_ds()#only words with recent dates

### Make metadafile with gridpoints

In [4]:
#tranform coordinates for longitude
metadata['longitude_trans'] = metadata['longitude']+360 
#add columns for gridpoints
metadata['x_grid'] = ''
metadata['y_grid'] = ''

#make new metadatafile and save as cvs 
#metadata_new = climf.save_grids(metadata.head(), ds) #(takes approx 6 minutes)
#metadata_new.to_csv('../data/metadata_grids.csv')


In [5]:
#load new metadata file that includes the gridpoints
metadata = pd.read_csv("../data/metadata_grids.csv")

In [6]:
metadata.head(10)

Unnamed: 0.1,Unnamed: 0,uid,latitude,longitude,date,split,longitude_trans,x_grid,y_grid
0,0,aabm,39.080319,-86.430867,2018-05-14,train,273.569133,570,1217
1,1,aabn,36.5597,-121.51,2016-08-31,test,238.49,550,192
2,2,aacd,35.875083,-78.878434,2020-11-19,train,281.121566,488,1455
3,3,aaee,35.487,-79.062133,2016-08-24,train,280.937867,473,1453
4,4,aaff,38.049471,-99.827001,2019-07-23,train,260.172999,513,831
5,5,aafl,39.474744,-86.898353,2021-08-23,train,273.101647,583,1202
6,6,aafp,35.647742,-79.271782,2017-11-15,train,280.728218,478,1445
7,7,aagm,35.906885,-79.132962,2020-06-10,train,280.867038,488,1447
8,8,aahy,35.726522,-79.125458,2014-08-12,train,280.874542,481,1449
9,9,aaia,35.98,-78.791686,2018-06-27,train,281.208314,493,1457


### Get temperatures for gridpoints

Make function that gets the mean/median temperature for one date for all the places sampled at that date (to save time) and the previos x days

In [7]:
metadata['temp'] = ''

In [9]:
from datetime import timedelta


start_date = climf.get_start_date(metadata.date[0])
date_index_list = metadata.index[metadata.date == start_date]
#print(date_index_list)
temp_list = []
done_list = []
days = 7
z = False
for idx,row in enumerate(metadata.index): #takes first element in metadata list
    #print(row, idx)
    start_date = metadata.date[idx]
    date_index_list = metadata.index[metadata.date == start_date]#list with all the indexes of dates with the same date
    #print(date_index_list)
    if start_date not in done_list:#only do if this date wasnt used before
        temp_list = [[0] * days for i in range(len(date_index_list))]#make list of lists to store the values inside
        #print(len(temp_list), len(temp_list[0]))
        done_list.append(start_date)#list of dates already samples
        start_date = climf.get_start_date(start_date)#formate to time object
        print(start_date)
        for x in range(days):
            count = 0
            day_date = start_date - timedelta(days=x)
            ds = climf.get_ds_aws(day_date)#getting the temperature array for the specified date
            for index in date_index_list:
                x_grid = metadata.x_grid[index]
                y_grid = metadata.y_grid[index]
                if z == True:
                    temp_list[count][x] = 'na'
                else:
                    temp_list[count][x] = ds[x_grid][y_grid]
                count += 1
                if x == days-1 and index == date_index_list[len(date_index_list)-1]:#if condition is met put the values in the metadata file
                    for i in range(len(temp_list)):                    
                        metadata.temp.loc[date_index_list[i]] = temp_list[i] #not index but 
        #if idx == 0:
        #    break  
    else:
        continue 

2018-05-14


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


2016-08-31


TypeError: 'NoneType' object is not subscriptable

In [None]:
metadata.head()

In [None]:
from datetime import date, timedelta, datetime
import io
import pandas as pd
import xarray as xr
import requests
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import cmocean
import tempfile
import numpy as np


In [None]:
start_date = climf.get_start_date('2016-08-27')
#ds = climf.get_ds_aws(start_date)


#blob_container = "https://noaahrrr.blob.core.windows.net/hrrr"
sector = "conus"
yesterday = day_date
cycle = 1 
forecast_hour = 1   # offset from cycle time
product = "wrfsfcf" # 2D surface levels
# Put it all together
file_path = f"hrrr.t{cycle:02}z.{product}{forecast_hour:02}.grib2"
url = f"https://noaa-hrrr-bdp-pds.s3.amazonaws.com/hrrr.{yesterday:%Y%m%d}/{sector}/{file_path}"

r = requests.get(f"{url}.idx")
idx = r.text.splitlines()

print(url)
print(r)
print(idx)

sfc_temp_idx = [l for l in idx if ":TMP:surface" in l][0].split(":")
# Pluck the byte offset from this line, plus the beginning offset of the next line
line_num = int(sfc_temp_idx[0])
range_start = sfc_temp_idx[1]
# The line number values are 1-indexed, so we don't need to increment it to get the next list index,
# but check we're not already reading the last line
next_line = idx[line_num].split(':') if line_num < len(idx) else None
# Pluck the start of the next byte offset, or nothing if we were on the last line
range_end = next_line[1] if next_line else None

file = tempfile.NamedTemporaryFile(prefix="tmp_", delete=False)
headers = {"Range": f"bytes={range_start}-{range_end}"}
resp = requests.get(url, headers=headers, stream=True)
with file as f:
    f.write(resp.content)
ds = xr.open_dataset(file.name, engine='cfgrib', 
                    backend_kwargs={'indexpath':''})
return ds.t.values 

In [None]:

https://noaa-hrrr-bdp-pds.s3.amazonaws.com/hrrr.20160827/conus/hrrr.t01z.wrfsfcf01.grib2

https://noaa-hrrr-bdp-pds.s3.amazonaws.com/hrrr.20160827/conus/hrrr.t12z.wrfsfcf01.grib2




In [None]:
for i in len(temp_list): 
    print(i)

date_index_list
date_index_list[2]


In [None]:
start_date = metadata.date[0]
date_index_list = metadata.index[metadata.date == start_date]
date_index_list.index == date_index_list

In [None]:
metadata.y_grid

In [None]:
print(x,y)
#ds[x][y].values

In [None]:
ds

In [None]:
from datetime import timedelta
start_date = climf.get_start_date(metadata.date[0])
temp_list = []
days = 2

for row in range(len(metadata[0].uid)):
    start_date = metadata.date[row]
    date_index_list = metadata.index[metadata.date == date]
    done_list.append(start_date)
    start_date = climf.get_start_date(start_date)
    temp_array = [[0] * len(start_date) for i in range(days)]
    print(temp_array)
    for x in range(days):
        day_date = start_date - timedelta(days=x)
        print(day_date)
        ds = climf.get_temp_aws(day_date)##input changed
        for idx in date_index_list:
            

            temp_list.append(temperature)
    temp_list


metadata.x_grid[0] ,metadata.y_grid[0]