In [1]:
import io
from datetime import date, timedelta

import xarray as xr
import requests
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import cmocean

# Not used directly, but used via xarray
import cfgrib

from herbie import Herbie, FastHerbie
import pandas as pd
from pathlib import Path

import warnings; warnings.simplefilter("ignore")

In [2]:
DATA_DIR = Path.cwd() / 'data'

## Get temp data from HRRR via Herbie

In [3]:
DATA_DIR = Path.cwd() / 'data'
metadata = pd.read_csv(DATA_DIR / 'metadata.csv')
metadata.date = pd.to_datetime(metadata.date)
temperature = metadata.copy()

In [63]:
def get_temps(date, points, uids):
    H = Herbie(
        date=date,
        model='hrrr',
        product='sfc',
        fxx=0,
        priority=['aws', 'azure', 'aws-old']
    )
    
    ds = H.xarray("TMP:surface")
    
    nearest = ds.herbie.nearest_points(points=points, names=uids)
    
    df = nearest.to_dataframe()
    df.reset_index(inplace=True)
    df = df.rename(columns={'point': 'uid', 't': 'temperature'})
    df = df[['uid', 'temperature']]
    
    result = df.copy()
    
    return result

In [65]:
unique_dates = pd.to_datetime(temperature.date.unique()).strftime('%Y-%m-%d')

In [68]:
temp_df = pd.DataFrame()
found_dates = []
errored_dates = []

In [69]:
# i = 0
for date in unique_dates:
    # if i > 5: break
    # i += 1
    
    try:
        loc = metadata[metadata.date == date]
        points = [(x, y) for x, y in zip(loc.longitude, loc.latitude)]
        uids = loc.uid.values
        
        temps = get_temps(date, points, uids)
        temp_df = pd.concat([temp_df, temps], axis=0)

        found_dates.append(date)
        
    except:
        errored_dates.append(date)
    

✅ Found ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2018-May-14 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
💔 Did not find ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2016-Aug-31 00:00 UTC[92m F00[0m
✅ Found ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2020-Nov-19 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
✅ Found ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2016-Aug-24 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
✅ Found ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2019-Jul-23 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
✅ Found ┊ model=hrrr ┊ [3mproduct=sfc[0m ┊ [38;2;41;130;13m2021-Aug-23 00:00 UTC[92m F00[0m ┊ [38;2;255;153;0m[3mGRIB2 @ aws[0m ┊ [38;2;255;153;0m[3mIDX @ aws[0m
✅ Found ┊ model=hrrr ┊ [3mprod

In [70]:
# see how many ran into errors
print(f"Could not pull temperature data for {len(errored_dates)} dates")

Could not pull temperature data for 342 dates


In [78]:
pd.to_datetime(errored_dates).sort_values()

DatetimeIndex(['2013-01-04', '2013-01-08', '2013-01-14', '2013-01-15',
               '2013-01-22', '2013-01-25', '2013-01-26', '2013-01-29',
               '2013-01-30', '2013-02-06',
               ...
               '2017-02-14', '2017-03-29', '2017-04-24', '2017-06-28',
               '2017-07-16', '2017-08-01', '2017-08-06', '2017-08-26',
               '2018-01-17', '2018-08-22'],
              dtype='datetime64[ns]', length=342, freq=None)

In [74]:
cel = temp_df.copy()
cel.temperature = cel.temperature - 273.15

In [75]:
cel

Unnamed: 0,uid,temperature
0,aabm,14.212616
1,dinb,25.837616
2,fcah,27.462616
3,gnog,28.337616
4,gzpf,28.587616
...,...,...
0,zafy,30.426300
0,zmft,21.230682
0,zmjq,6.035980
0,znjg,19.498993


In [76]:
cel.to_csv(DATA_DIR / 'temperature.csv', index=False)