In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Data from [https://cimis.water.ca.gov/Default.aspx](https://cimis.water.ca.gov/Default.aspx) and [https://et.water.ca.gov/Rest/Index](https://et.water.ca.gov/Rest/Index)

In [2]:
import sys
sys.path.append("../loaders/weather_station")

In [3]:
import weather_reports

In [4]:
source = pd.read_csv("../data/source.csv", parse_dates=True)
source[["start", "end"]] = source[["start", "end"]].astype("datetime64[ns]")

In [5]:
ws = pd.read_csv("../data/weather/weather_stations.csv", parse_dates=True)
ws.columns

Index(['StationNbr', 'Name', 'City', 'RegionalOffice', 'County', 'ConnectDate',
       'DisconnectDate', 'IsActive', 'IsEtoStation', 'Elevation',
       'GroundCover', 'HmsLatitude', 'HmsLongitude', 'ZipCodes', 'SitingDesc'],
      dtype='object')

In [6]:
ws[ws.StationNbr == 6]

Unnamed: 0,StationNbr,Name,City,RegionalOffice,County,ConnectDate,DisconnectDate,IsActive,IsEtoStation,Elevation,GroundCover,HmsLatitude,HmsLongitude,ZipCodes,SitingDesc
5,6,Davis,Davis,North Central Region Office,Yolo,7/17/1982,12/31/2050,True,True,60,Grass,38º32'8N / 38.535694,-121º46'35W / -121.776360,"['95618', '95617', '95616']",


In [7]:
sce_ws = source["Weather Station"].unique()
api_ws = ws.Name
matches = {}
for n in api_ws:
    for w in sce_ws:
        if n.lower() in w.lower() and n not in matches:
            matches[n]=w

In [8]:
matches

{'Davis': 'SCE Mount Davis',
 'Soledad': 'SCE Soledad Canyon Rd',
 'Santa Paula': 'SCE Santa Paula',
 'Escondido': 'SCE Escondido Canyon',
 'Ramona': 'SCE Ramona Expy',
 'Moorpark': 'SCE Moorpark',
 'Moreno Valley': 'SCE Moreno Valley Fwy',
 'Williams': 'SCE Williams Canyon Rd',
 'Highland': 'HIGHLAND SPRINGS RAW',
 'Chino': 'SCE Chino Hills 3'}

In [9]:
ws_matches = ws[ws.Name.isin(matches.keys())].copy()
ws_matches.Name = ws_matches.Name.apply(lambda s: matches[s])
source_ws = source.merge(ws_matches, left_on="Weather Station", right_on="Name")

In [11]:
ws_query = source_ws[["StationNbr", "start", "end"]]
ws_query = ws_query[~ws_query.duplicated()]

In [14]:
# look at data the day before/after the event
one_day = pd.Timedelta(days=1)
one_month = pd.Timedelta(days=30)
ws_query.start = ws_query.start - one_month
ws_query.end = ws_query.end + one_month

In [15]:
query = ws_query.astype("str").values
query

array([['6', '2020-10-30', '2020-12-29'],
       ['6', '2020-11-16', '2021-01-23'],
       ['6', '2020-11-04', '2021-01-13'],
       ['6', '2020-12-13', '2021-02-20'],
       ['98', '2020-10-30', '2020-12-29'],
       ['238', '2020-10-30', '2020-12-29'],
       ['58', '2020-10-30', '2020-12-29'],
       ['198', '2020-10-30', '2020-12-29'],
       ['58', '2020-12-13', '2021-02-20'],
       ['198', '2020-12-13', '2021-02-20'],
       ['28', '2020-10-30', '2020-12-29'],
       ['217', '2020-10-30', '2020-12-29'],
       ['251', '2020-11-04', '2021-01-13'],
       ['250', '2020-11-04', '2021-01-13'],
       ['74', '2020-11-04', '2021-01-13'],
       ['74', '2020-12-13', '2021-02-20'],
       ['255', '2020-12-13', '2021-02-20']], dtype=object)

In [16]:
# go off and use ye old query module
wr = weather_reports.weather_station_query(query)

Querying 6 2020-10-30 2020-12-29
Querying 6 2020-11-16 2021-01-23
Querying 6 2020-11-04 2021-01-13
Querying 6 2020-12-13 2021-02-20
Querying 98 2020-10-30 2020-12-29
Querying 238 2020-10-30 2020-12-29
Querying 58 2020-10-30 2020-12-29
Querying 198 2020-10-30 2020-12-29
Querying 58 2020-12-13 2021-02-20
Querying 198 2020-12-13 2021-02-20
Querying 28 2020-10-30 2020-12-29
Querying 217 2020-10-30 2020-12-29
Querying 251 2020-11-04 2021-01-13
Querying 250 2020-11-04 2021-01-13
Querying 74 2020-11-04 2021-01-13
Querying 74 2020-12-13 2021-02-20
Querying 255 2020-12-13 2021-02-20


In [17]:
wr.head()

Unnamed: 0,Date,Julian,Hour,Station,Standard,ZipCodes,Scope,HlyAirTmp (F),HlyDewPnt (F),HlyEto (in),...,HlyAsceEto (in),HlyAsceEtr (in),HlyPrecip (in),HlyRelHum (%),HlyResWind (MPH),HlySoilTmp (F),HlySolRad (Ly/day),HlyVapPres (mBars),HlyWindDir (°),HlyWindSpd (MPH)
0,2020-10-30,304,100,6,english,"95618, 95617, 95616",hourly,45.4,35.7,0,...,0,,0,69,2.0,57.6,0,7.1,217,2.2
1,2020-10-30,304,200,6,english,"95618, 95617, 95616",hourly,45.1,35.9,0,...,0,,0,70,1.5,57.2,0,7.2,251,1.7
2,2020-10-30,304,300,6,english,"95618, 95617, 95616",hourly,43.9,34.7,0,...,0,,0,70,1.8,56.9,0,6.8,274,2.1
3,2020-10-30,304,400,6,english,"95618, 95617, 95616",hourly,43.6,34.1,0,...,0,,0,69,1.3,56.6,0,6.6,269,1.4
4,2020-10-30,304,500,6,english,"95618, 95617, 95616",hourly,45.0,35.4,0,...,0,,0,69,1.6,56.4,0,7.0,267,1.6


In [18]:
import time
wr.to_csv("../data/weather/weather_report_{}.csv".format(time.time()), index=False)