In [1]:
import pandas as pd
import zeep
from functools import reduce
import pins
import dotenv
import requests
import os
import requests

dotenv.load_dotenv(".env")

current_time = pd.Timestamp.now(tz="America/Denver")

board = pins.board_connect()

# Configuration!

In [2]:
# Pin config
SNOTEL_PIN_NAME = "nick.pelikan/snotel_data"
WX_PIN_NAME = "nick.pelikan/wx_data"

In [2]:
site_config = {
    "BCC": {
        "snotel_sites": {
            "366:UT:SNTL": "Brighton, UT",
            "628:UT:SNTL": "Mill D, UT",
        },
        "wx_stations": {
            "C99": "Canyons - 9990",
            "REY": "Reynolds Peak",
            "UTCDF": "Cardiff Trailhead",
            "PC056": "Brighton",
        },
    },
    "LCC": {
        "snotel_sites": {
            "766:UT:SNTL": "Snowbird, UT",
            "1308:UT:SNTL": "Atwater Plot, UT",
        },
        "wx_stations": {
            "IFF": "Cardiff Peak",
            "PC064": "Albion Basin",
            "AMB": "Alta - Baldy",
            "HP": "Hidden Peak",
        },
    },
    "PC": {
        "snotel_sites": {
            "814:UT:SNTL": "Thaynes Canyon, UT",
        },
        "wx_stations": {
            "C99": "Canyons - 9990",
            "CDYBK": "Canyons - Daybreak",
            "REY": "Reynolds Peak",
        },
    },
}


In [4]:
site_config.keys()

dict_keys(['BCC', 'LCC', 'PC'])

In [None]:

board.pin_write(site_config, name="nick.pelikan/snow_sites", type="json")

station_ids = {}
snotel_sites = {}
for d in site_config.values():
    station_ids = station_ids | d["wx_stations"]
    snotel_sites = snotel_sites | d["snotel_sites"]

snotel_sensors = {
    "TOBS": "Air Temperature (F)",
    "SNWD": "Snow Depth (in)",
    "WTEQ": "Snow Water Eq (in)",
}

# SNOTEL Data Scraping
SNOTEL sites use a ...fun... old SOAP api. 

In [None]:
def filter_valdict(d):
    return {k:v for k,v in d.items() if k in ('dateTime', 'value')}

def process_site(i, sensor_code):
    i = zeep.helpers.serialize_object(i)
    df = pd.DataFrame.from_records(filter_valdict(x) for x in i['values'])
    df["siteCode"] = i["stationTriplet"]
    df["dateTime"] = pd.to_datetime(df["dateTime"])
    df = df.set_index(["siteCode", "dateTime"])
    df["value"] = df["value"].astype(float)
    df = df.rename(columns = {"value": sensor_code})
    return df


def get_single_sensor_data(snotel_client, site_codes, sensor_code, start_date, end_date):
    resp = snotel_client.service.getHourlyData(
        stationTriplets=site_codes,
        elementCd=sensor_code,
        ordinal=1,
        beginDate=start_date,
        endDate=end_date
    )
    return pd.concat((process_site(x, sensor_code=sensor_code) for x in resp))


def get_snotel_data(snotel_client, site_codes, sensor_codes, start_date, end_date):
    dfl = (
        get_single_sensor_data(
            snotel_client,
            sensor_code=x,
            site_codes=site_codes,
            start_date=start_date,
            end_date=end_date
        ) for x in sensor_codes
    )

    return reduce(lambda l, r: pd.merge(l, r, left_index=True, right_index=True), dfl)


In [None]:

client = zeep.Client("https://wcc.sc.egov.usda.gov/awdbWebService/services?WSDL")
current_time = pd.Timestamp.now(tz="America/Denver")

try:
    snotel_old_meta = board.pin_meta(SNOTEL_PIN_NAME)
    lastmax = pd.to_datetime(snotel_old_meta.user["last_run_time"])
except pins.errors.PinsError:
    lastmax = current_time - pd.Timedelta(days=60)

print(f"Getting data from {lastmax} to current")

In [None]:

df = get_snotel_data(
    client,
    site_codes=list(snotel_sites.keys()),
    sensor_codes=list(snotel_sensors.keys()),
    start_date=lastmax.strftime("%Y-%m-%d %H:00:00"),
    end_date=current_time.strftime("%Y-%m-%d %H:00:00"),
).reset_index()

df["siteName"] = df["siteCode"].replace(snotel_sites)
df = df.reset_index()
df = df.drop(columns="index")
print(f"Retrieved {df.shape[0]} new rows.")


In [None]:
try:
    snotel_old = board.pin_read(SNOTEL_PIN_NAME)
    out = pd.concat((snotel_old, df))
except pins.errors.PinsError:
    out = df

board.pin_write(
    out,
    name=SNOTEL_PIN_NAME,
    type="parquet",
    metadata={"last_run_time": current_time.strftime('%Y-%m-%d %X')},
)

print(f"Successfully wrote snotel data")

# Weather Data Scraping

In [7]:
lastmax

Timestamp('2024-02-26 09:56:54')

In [8]:
try:
    lastmax = pd.to_datetime(board.pin_meta(WX_PIN_NAME).user["last_run_time"]).tz_localize(tz="America/Denver")
    wx_req_run = int((current_time - lastmax).total_seconds() / 60)
except pins.errors.PinsError:
    wx_req_run = 43200

def get_station_series(i):
    df = pd.DataFrame(i["OBSERVATIONS"])
    df["date_time"] = pd.to_datetime(df["date_time"]).dt.tz_convert(tz="America/Denver")
    df["station"] = i["NAME"]
    df["station_id"] = i["STID"]

    return df

resp = requests.get(
    "https://api.synopticdata.com/v2/stations/timeseries",
    params={
        "token": os.getenv("SYNOPTIC_TOKEN"),  # access token
        "stid": list(station_ids.keys()),  # mesonet station ids
        "recent": wx_req_run,  # time in minutes
    },
)

j = resp.json()
wx_df = pd.concat((get_station_series(x) for x in j["STATION"]))
wx_df["date_time"] = pd.to_datetime(wx_df["date_time"]).dt.tz_convert(tz="America/Denver")

print(f"Retrieved {wx_df.shape[0]} rows of new weather data!")

Retrieved 128 rows of new weather data!


In [9]:
try:
    wx_old = board.pin_read(WX_PIN_NAME)
    wx_out = pd.concat((wx_old, wx_df))
except pins.errors.PinsError:
    wx_out = wx_df

board.pin_write(
    wx_out,
    name=WX_PIN_NAME,
    type="parquet",
    metadata={"last_run_time": current_time.strftime('%Y-%m-%d %X')},
)

print(f"Successfully wrote WX data")

Writing pin:
Name: 'nick.pelikan/wx_data'
Version: 20240226T145414Z-64b39


Successfully wrote WX data


In [10]:
board.pin_read("nick.pelikan/snow_sites")

{'BCC': {'snotel_sites': {'366:UT:SNTL': 'Brighton, UT',
   '628:UT:SNTL': 'Mill D, UT'},
  'wx_stations': {'C99': 'Canyons - 9990',
   'REY': 'Reynolds Peak',
   'UTCDF': 'Cardiff Trailhead',
   'PC056': 'Brighton'}},
 'LCC': {'snotel_sites': {'766:UT:SNTL': 'Snowbird, UT',
   '1308:UT:SNTL': 'Atwater Plot, UT'},
  'wx_stations': {'IFF': 'Cardiff Peak',
   'PC064': 'Albion Basin',
   'AMB': 'Alta - Baldy',
   'HP': 'Hidden Peak'}},
 'PC': {'snotel_sites': {'814:UT:SNTL': 'Thaynes Canyon, UT'},
  'wx_stations': {'C99': 'Canyons - 9990',
   'CDYBK': 'Canyons - Daybreak',
   'REY': 'Reynolds Peak'}}}

In [4]:
m = board.pin_meta("nick.pelikan/snow_sites")
m.pin_hash

'6af462c17551cbe0'

NameError: name 'site_config' is not defined