In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Data from [https://cimis.water.ca.gov/Default.aspx](https://cimis.water.ca.gov/Default.aspx) and [https://et.water.ca.gov/Rest/Index](https://et.water.ca.gov/Rest/Index)

In [66]:
import sys
sys.path.append("../loaders/weather_station")

In [67]:
import weather_reports

In [68]:
source = pd.read_csv("../data/source.csv", parse_dates=True)
source[["start", "end"]] = source[["start", "end"]].astype("datetime64[ns]")

In [74]:
ws = pd.read_csv("../data/weather/weather_stations.csv", parse_dates=True)
ws.columns

Index(['StationNbr', 'Name', 'City', 'RegionalOffice', 'County', 'ConnectDate',
       'DisconnectDate', 'IsActive', 'IsEtoStation', 'Elevation',
       'GroundCover', 'HmsLatitude', 'HmsLongitude', 'ZipCodes', 'SitingDesc'],
      dtype='object')

In [76]:
ws[ws.StationNbr == 6]

Unnamed: 0,StationNbr,Name,City,RegionalOffice,County,ConnectDate,DisconnectDate,IsActive,IsEtoStation,Elevation,GroundCover,HmsLatitude,HmsLongitude,ZipCodes,SitingDesc
5,6,Davis,Davis,North Central Region Office,Yolo,7/17/1982,12/31/2050,True,True,60,Grass,38º32'8N / 38.535694,-121º46'35W / -121.776360,"['95618', '95617', '95616']",


In [71]:
sce_ws = source["Weather Station"].unique()
api_ws = ws.Name
matches = {}
for n in api_ws:
    for w in sce_ws:
        if n.lower() in w.lower() and n not in matches:
            matches[n]=w

In [78]:
matches

{'Davis': 'SCE Mount Davis',
 'Soledad': 'SCE Soledad Canyon Rd',
 'Santa Paula': 'SCE Santa Paula',
 'Escondido': 'SCE Escondido Canyon',
 'Ramona': 'SCE Ramona Expy',
 'Moorpark': 'SCE Moorpark',
 'Moreno Valley': 'SCE Moreno Valley Fwy',
 'Williams': 'SCE Williams Canyon Rd',
 'Highland': 'HIGHLAND SPRINGS RAW',
 'Chino': 'SCE Chino Hills 3'}

In [72]:
ws_matches = ws[ws.Name.isin(matches.keys())].copy()
ws_matches.Name = ws_matches.Name.apply(lambda s: matches[s])
source_ws = source.merge(ws_matches, left_on="Weather Station", right_on="Name")

In [77]:
source_ws.columns

Index(['Circuit', 'Weather Station', 'Wind Sustained', 'Gust Sustained',
       'Thresholds (Sustained/ Gust)', 'FPI Value',
       'Reasons for De‐Energization', 'month', 'days', 'year',
       'Wind Threshold', 'Gust Threshold', 'Wind Trigger Percentage',
       'Gust Trigger Percentage', 'Combined Trigger Percentage', 'start day',
       'end day', 'start', 'end', 'StationNbr', 'Name', 'City',
       'RegionalOffice', 'County', 'ConnectDate', 'DisconnectDate', 'IsActive',
       'IsEtoStation', 'Elevation', 'GroundCover', 'HmsLatitude',
       'HmsLongitude', 'ZipCodes', 'SitingDesc'],
      dtype='object')

In [75]:
source_ws[["Circuit", "Weather Station", "StationNbr", "Name", "City"]]

Unnamed: 0,Circuit,Weather Station,StationNbr,Name,City
0,DUKE,SCE Mount Davis,6,SCE Mount Davis,Davis
1,DUKE,SCE Mount Davis,6,SCE Mount Davis,Davis
2,DUKE,SCE Mount Davis,6,SCE Mount Davis,Davis
3,DUKE,SCE Mount Davis,6,SCE Mount Davis,Davis
4,BONNEVILLE,SCE Ramona Expy,98,SCE Ramona Expy,Ramona
5,SOGGY,SCE Ramona Expy,98,SCE Ramona Expy,Ramona
6,OLIVER,SCE Ramona Expy,98,SCE Ramona Expy,Ramona
7,GINGER,SCE Moreno Valley Fwy,238,SCE Moreno Valley Fwy,Moreno Valley
8,FLAKE,SCE Moreno Valley Fwy,238,SCE Moreno Valley Fwy,Moreno Valley
9,MIDDLE ROAD,SCE Santa Paula,58,SCE Santa Paula,Santa Paula


In [21]:
ws_query = source_ws[["StationNbr", "start", "end"]]
ws_query = ws_query[~ws_query.duplicated()]

In [22]:
# look at data the day before/after the event
oneday = pd.Timedelta(days=1)
ws_query.start = ws_query.start - oneday
ws_query.end = ws_query.end + oneday

In [23]:
query = ws_query.astype("str").values
query

array([['6', '2020-11-28', '2020-11-30'],
       ['6', '2020-12-15', '2020-12-25'],
       ['6', '2020-12-03', '2020-12-15'],
       ['6', '2021-01-11', '2021-01-22'],
       ['98', '2020-11-28', '2020-11-30'],
       ['238', '2020-11-28', '2020-11-30'],
       ['58', '2020-11-28', '2020-11-30'],
       ['198', '2020-11-28', '2020-11-30'],
       ['58', '2021-01-11', '2021-01-22'],
       ['198', '2021-01-11', '2021-01-22'],
       ['28', '2020-11-28', '2020-11-30'],
       ['217', '2020-11-28', '2020-11-30'],
       ['251', '2020-12-03', '2020-12-15'],
       ['250', '2020-12-03', '2020-12-15'],
       ['74', '2020-12-03', '2020-12-15'],
       ['74', '2021-01-11', '2021-01-22'],
       ['255', '2021-01-11', '2021-01-22']], dtype=object)

In [27]:
# go off and use ye old query module
#wr = weather_reports.weather_station_query(query)
#wr.head()
#wr.to_csv("../data/weather/weather_report_{}.csv".format(hash(tuple(query[0]))), index=False)

Querying 6 2020-11-28 2020-11-30
Querying 6 2020-12-15 2020-12-25
Querying 6 2020-12-03 2020-12-15
Querying 6 2021-01-11 2021-01-22
Querying 98 2020-11-28 2020-11-30
Querying 238 2020-11-28 2020-11-30
Querying 58 2020-11-28 2020-11-30
Querying 198 2020-11-28 2020-11-30
Querying 58 2021-01-11 2021-01-22
Querying 198 2021-01-11 2021-01-22
Querying 28 2020-11-28 2020-11-30
Querying 217 2020-11-28 2020-11-30
Querying 251 2020-12-03 2020-12-15
Querying 250 2020-12-03 2020-12-15
Querying 74 2020-12-03 2020-12-15
Querying 74 2021-01-11 2021-01-22
Querying 255 2021-01-11 2021-01-22


Unnamed: 0,Date,Julian,Hour,Station,Standard,ZipCodes,Scope,HlyAirTmp (F),HlyDewPnt (F),HlyEto (in),...,HlyAsceEto (in),HlyAsceEtr (in),HlyPrecip (in),HlyRelHum (%),HlyResWind (MPH),HlySoilTmp (F),HlySolRad (Ly/day),HlyVapPres (mBars),HlyWindDir (°),HlyWindSpd (MPH)
0,2020-11-28,333,100,6,english,"95618, 95617, 95616",hourly,35.4,28.1,0,...,0,,0,74,1.0,49.6,0,5.2,235,1.0
1,2020-11-28,333,200,6,english,"95618, 95617, 95616",hourly,34.9,28.5,0,...,0,,0,77,1.0,49.3,0,5.3,234,1.0
2,2020-11-28,333,300,6,english,"95618, 95617, 95616",hourly,35.5,27.7,0,...,0,,0,73,1.7,49.1,0,5.1,235,1.7
3,2020-11-28,333,400,6,english,"95618, 95617, 95616",hourly,36.5,27.7,0,...,0,,0,70,4.0,48.8,0,5.1,237,4.0
4,2020-11-28,333,500,6,english,"95618, 95617, 95616",hourly,36.5,27.9,0,...,0,,0,71,3.7,48.6,0,5.2,238,3.7
