In [116]:
import pandas as pd
import io
import time
import requests
import json
from urllib.request import urlopen
import numpy as np

# Number of attempts to download data
MAX_ATTEMPTS = 6
MIN_VALID_MINS = 5 * 60
INCH_2_MM = 25.4
MAX_1_MIN = 38
MAX_6_HOUR = 840

In [66]:
def download_data(uri):
    """Fetch the data from the IEM

    The IEM download service has some protections in place to keep the number
    of inbound requests in check.  This function implements an exponential
    backoff to keep individual downloads from erroring.

    Args:
      uri (string): URL to fetch

    Returns:
      string data
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        try:
            data = urlopen(uri, timeout=300).read().decode("utf-8")
            if data is not None and not data.startswith("ERROR"):
                return data
        except Exception as exp:
            print(f"download_data({uri}) failed with {exp}")
            time.sleep(5)
        attempt += 1

    print("Exhausted attempts to download, returning empty data")

In [67]:
station_dict = {}

states = (
    "AK AL AR AZ CA CO CT DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN "
    "MO MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT "
    "WA WI WV WY"
)
networks = [f"{state}_ASOS" for state in states.split()]

for network in networks:
    # Get metadata
    uri = "https://mesonet.agron.iastate.edu/" f"geojson/network/{network}.geojson"
    data = urlopen(uri)
    jdict = json.load(data)
    stations = []
    for site in jdict["features"]:
        stations.append(site["properties"]["sid"])
    station_dict[network] = stations

In [68]:
state = "CO"
network = f"{state}_ASOS"
start_year = 2022
end_year = 2024
start_month = 1
end_month = 7
start_day = 1
end_day = 1

station = "DEN"

uri = f"https://mesonet.agron.iastate.edu/cgi-bin/request/asos1min.py?station={station}&tz=UTC&year1={start_year}&month1={start_month}&day1={start_day}&hour1=0&minute1=0&year2={end_year}&month2={end_month}&day2={end_day}&hour2=23&minute2=59&vars=ptype&vars=precip&sample=1min&what=view&delim=comma&gis=yes"
data = download_data(uri)
# https://mesonet.agron.iastate.edu/cgi-bin/request/asos1min.py?station=12N&tz=UTC&year1=2024&month1=1&day1=1&hour1=0&minute1=0&year2=2024&month2=1&day2=2&hour2=1&minute2=6&vars=ptype&vars=precip&sample=1min&what=view&delim=comma&gis=yes

In [117]:
df = pd.read_csv(io.StringIO(data))
df["precip"] = df["precip"].replace("M", np.nan)
df["precip"] = df["precip"].astype(float)
df["precip"] = df["precip"] * INCH_2_MM
df["valid(UTC)"] = pd.to_datetime(df["valid(UTC)"])
df.set_index("valid(UTC)", inplace=True)

df.loc[df["precip"] >= MAX_1_MIN, "precip"] = np.nan

precip_df = df["precip"]
df_resampled = precip_df.resample("6h").agg(pd.Series.sum, min_count=MIN_VALID_MINS)
df_resampled = df_resampled
df_resampled.loc[df_resampled >= MAX_6_HOUR] = np.nan
df_resampled.attrs = {
    "station": df.iloc[0]["station"],
    "lat": df.iloc[0]["lat"],
    "lon": df.iloc[0]["lon"],
}

In [118]:
nan_count = df_resampled.isna().sum()
nan_count

725

In [85]:
df_resampled
import plotly.express as px

fig = px.line(df_resampled, x=df_resampled.index, y=df_resampled.values)
fig.show()

In [96]:
df_resampled.attrs = {
    "station": df.iloc[0]["station"],
    "lat": df.iloc[0]["lat"],
    "lon": df.iloc[0]["lon"],
}

In [98]:
df_resampled.attrs

{'station': 'DEN', 'lat': 39.8328, 'lon': -104.6575}

In [103]:
da = df_resampled.to_xarray()
da.attrs = df_resampled.attrs

In [120]:
da.to_netcdf("../sample_data/precip.nc")

In [112]:
(0.4 + 0.4 + 0.9 + 1.7 + 2.2 + 1.9 + 2.1 + 1.6 + 1.4 + 1 + 0.6 + 0.4) * INCH_2_MM

370.84