In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Data from [https://cimis.water.ca.gov/Default.aspx](https://cimis.water.ca.gov/Default.aspx) and [https://et.water.ca.gov/Rest/Index](https://et.water.ca.gov/Rest/Index)

In [2]:
import sys
sys.path.append("../loaders/weather_station")

In [3]:
import weather_reports

In [4]:
source = pd.read_csv("../data/source.csv", parse_dates=True)
source[["start", "end"]] = source[["start", "end"]].astype("datetime64[ns]")

### Import weather station index

In [5]:
ws = pd.read_csv("../data/weather/weather_stations.csv", parse_dates=True)
ws.columns

Index(['StationNbr', 'Name', 'City', 'RegionalOffice', 'County', 'ConnectDate',
       'DisconnectDate', 'IsActive', 'IsEtoStation', 'Elevation',
       'GroundCover', 'HmsLatitude', 'HmsLongitude', 'ZipCodes', 'SitingDesc'],
      dtype='object')

In [6]:
ws[ws.County.str.contains("Yolo")]

Unnamed: 0,StationNbr,Name,City,RegionalOffice,County,ConnectDate,DisconnectDate,IsActive,IsEtoStation,Elevation,GroundCover,HmsLatitude,HmsLongitude,ZipCodes,SitingDesc
5,6,Davis,Davis,North Central Region Office,Yolo,7/17/1982,12/31/2050,True,True,60,Grass,38º32'8N / 38.535694,-121º46'35W / -121.776360,"['95618', '95617', '95616']",
26,27,Zamora,Woodland,North Central Region Office,Yolo,12/5/1982,1/20/2006,False,True,50,Grass,38º48'32N / 38.808758,-121º54'27W / -121.907540,['95698'],
154,155,Bryte (experimental),West Sacramento,North Central Region Office,Yolo,12/10/1998,12/31/2050,True,False,40,Bare,38º35'57N / 38.599158,-121º32'25W / -121.540410,"['95799', '95798', '95691', '95605']",
195,196,Esparto,Esparto,North Central Region Office,Yolo,4/15/2005,7/15/2017,False,True,174,Grass,38º41'30N / 38.691786,-122º0'50W / -122.013810,"['95607', '95627', '95653', '95695']",
225,226,Woodland,Woodland,North Central Region Office,Yolo,5/12/2011,12/31/2050,True,True,82,Grass,38º40'22N / 38.672722,-121º48'42W / -121.811720,"['95695', '95776', '95627']",


### Match Weather Station API against SCE Weather Station names

In [11]:
sce_ws = source["Weather Station"].unique()
api_ws = ws.Name
matches = {}
for n in api_ws:
    for w in sce_ws:
        if n.lower() in w.lower() and n not in matches:
            matches[n]=w

In [12]:
matches

{'Davis': 'SCE Mount Davis',
 'Soledad': 'SCE Soledad Canyon Rd',
 'Santa Paula': 'SCE Santa Paula',
 'Escondido': 'SCE Escondido Canyon',
 'Ramona': 'SCE Ramona Expy',
 'Moorpark': 'SCE Moorpark',
 'Moreno Valley': 'SCE Moreno Valley Fwy',
 'Williams': 'SCE Williams Canyon Rd',
 'Highland': 'HIGHLAND SPRINGS RAW',
 'Chino': 'SCE Chino Hills 3'}

In [13]:
ws_matches = ws[ws.Name.isin(matches.keys())].copy()
ws_matches.Name = ws_matches.Name.apply(lambda s: matches[s])
source_ws = source.merge(ws_matches, left_on="Weather Station", right_on="Name")

### Prepare query for Weather Station API using matched Weather Stations

In [14]:
ws_query = source_ws[["StationNbr", "start", "end"]]
ws_query = ws_query[~ws_query.duplicated()]

In [15]:
# look at data the day before/after the event
one_day = pd.Timedelta(days=1)
one_month = pd.Timedelta(days=30)
ws_query.start = ws_query.start - one_month
ws_query.end = ws_query.end + one_month

In [16]:
query = ws_query.astype("str").values
query

array([['6', '2020-10-30', '2020-12-29'],
       ['6', '2020-11-16', '2021-01-23'],
       ['6', '2020-11-04', '2021-01-13'],
       ['6', '2020-12-13', '2021-02-20'],
       ['98', '2020-10-30', '2020-12-29'],
       ['238', '2020-10-30', '2020-12-29'],
       ['58', '2020-10-30', '2020-12-29'],
       ['198', '2020-10-30', '2020-12-29'],
       ['58', '2020-12-13', '2021-02-20'],
       ['198', '2020-12-13', '2021-02-20'],
       ['28', '2020-10-30', '2020-12-29'],
       ['217', '2020-10-30', '2020-12-29'],
       ['251', '2020-11-04', '2021-01-13'],
       ['250', '2020-11-04', '2021-01-13'],
       ['74', '2020-11-04', '2021-01-13'],
       ['74', '2020-12-13', '2021-02-20'],
       ['255', '2020-12-13', '2021-02-20']], dtype=object)

In [23]:
ws.loc[ws.StationNbr.isin(query[:,0]), ["StationNbr", "Name", "City"]]

Unnamed: 0,StationNbr,Name,City
5,6,Davis,Davis
27,28,Soledad,Soledad
57,58,Santa Paula,Santa Paula
73,74,Escondido,Escondido
97,98,Ramona,Ramona
197,198,Santa Paula,Santa Paula
216,217,Moorpark,Moorpark
237,238,Moreno Valley,Moreno Valley
249,250,Williams,Williams
250,251,Highland,Highland


In [58]:
start = "2020-12-01"
end = "2021-02-01"
full_query = []
for s in ws[ws.IsActive==True].StationNbr:
    full_query.append([s, start, end])

[[2, '2020-12-01', '2021-02-01'],
 [5, '2020-12-01', '2021-02-01'],
 [6, '2020-12-01', '2021-02-01'],
 [7, '2020-12-01', '2021-02-01'],
 [12, '2020-12-01', '2021-02-01'],
 [13, '2020-12-01', '2021-02-01'],
 [15, '2020-12-01', '2021-02-01'],
 [35, '2020-12-01', '2021-02-01'],
 [39, '2020-12-01', '2021-02-01'],
 [41, '2020-12-01', '2021-02-01'],
 [43, '2020-12-01', '2021-02-01'],
 [44, '2020-12-01', '2021-02-01'],
 [47, '2020-12-01', '2021-02-01'],
 [52, '2020-12-01', '2021-02-01'],
 [54, '2020-12-01', '2021-02-01'],
 [56, '2020-12-01', '2021-02-01'],
 [57, '2020-12-01', '2021-02-01'],
 [62, '2020-12-01', '2021-02-01'],
 [64, '2020-12-01', '2021-02-01'],
 [68, '2020-12-01', '2021-02-01'],
 [70, '2020-12-01', '2021-02-01'],
 [71, '2020-12-01', '2021-02-01'],
 [75, '2020-12-01', '2021-02-01'],
 [77, '2020-12-01', '2021-02-01'],
 [78, '2020-12-01', '2021-02-01'],
 [80, '2020-12-01', '2021-02-01'],
 [83, '2020-12-01', '2021-02-01'],
 [84, '2020-12-01', '2021-02-01'],
 [87, '2020-12-01', '202

In [61]:
from importlib import reload
reload(weather_reports)

<module 'weather_reports' from '../loaders/weather_station/weather_reports.py'>

In [62]:
# go off and use ye old query module
#wr = weather_reports.weather_station_query(full_query)

Querying 2 2020-12-01 2021-02-01
Querying 5 2020-12-01 2021-02-01
Querying 6 2020-12-01 2021-02-01
Querying 7 2020-12-01 2021-02-01
Querying 12 2020-12-01 2021-02-01
Querying 13 2020-12-01 2021-02-01
Querying 15 2020-12-01 2021-02-01
Querying 35 2020-12-01 2021-02-01
Querying 39 2020-12-01 2021-02-01
Querying 41 2020-12-01 2021-02-01
Querying 43 2020-12-01 2021-02-01
Querying 44 2020-12-01 2021-02-01
Querying 47 2020-12-01 2021-02-01
Querying 52 2020-12-01 2021-02-01
Querying 54 2020-12-01 2021-02-01
Querying 56 2020-12-01 2021-02-01
Querying 57 2020-12-01 2021-02-01
Querying 62 2020-12-01 2021-02-01
Querying 64 2020-12-01 2021-02-01
Querying 68 2020-12-01 2021-02-01
Querying 70 2020-12-01 2021-02-01
Querying 71 2020-12-01 2021-02-01
Querying 75 2020-12-01 2021-02-01
Querying 77 2020-12-01 2021-02-01
Querying 78 2020-12-01 2021-02-01
Querying 80 2020-12-01 2021-02-01
Querying 83 2020-12-01 2021-02-01
Querying 84 2020-12-01 2021-02-01
Querying 87 2020-12-01 2021-02-01
Querying 88 2020-1

FileNotFoundError: [Errno 2] No such file or directory: './data/weather/weather_report_1620406529.8128471.csv'

In [60]:
wr.head()

NameError: name 'wr' is not defined

In [18]:
import time
wr.to_csv("../data/weather/weather_report_{}.csv".format(time.time()), index=False)

### Inspect results, output to CSV

In [7]:
wr = pd.read_csv("../data/weather/weather_report_1620408982.771627.csv")
wr.columns

Index(['Date', 'Julian', 'Hour', 'Station', 'Standard', 'ZipCodes', 'Scope',
       'HlyAirTmp (F)', 'HlyDewPnt (F)', 'HlyEto (in)', 'HlyNetRad (Ly/day)',
       'HlyAsceEto (in)', 'HlyAsceEtr (in)', 'HlyPrecip (in)', 'HlyRelHum (%)',
       'HlyResWind (MPH)', 'HlySoilTmp (F)', 'HlySolRad (Ly/day)',
       'HlyVapPres (mBars)', 'HlyWindDir (°)', 'HlyWindSpd (MPH)'],
      dtype='object')

In [9]:
wind_max = wr[["Station", "HlyWindSpd (MPH)"]].groupby("Station").max()
wind_max = wind_max[wind_max > 31].dropna()

In [10]:
ws[ws.StationNbr.isin(wind_max.index)]

Unnamed: 0,StationNbr,Name,City,RegionalOffice,County,ConnectDate,DisconnectDate,IsActive,IsEtoStation,Elevation,GroundCover,HmsLatitude,HmsLongitude,ZipCodes,SitingDesc
120,121,Dixon,Dixon,North Central Region Office,Solano,9/20/1994,12/31/2050,True,True,37,Grass,38º24'56N / 38.415564,-121º47'13W / -121.786910,['95620'],
139,140,Twitchell Island,Rio Vista,North Central Region Office,Sacramento,10/8/1997,12/31/2050,True,True,0,Grass,38º7'18N / 38.121739,-121º40'28W / -121.674455,"['94571', '95641']",
211,212,Hastings Tract East,Dixon,North Central Region Office,Solano,10/20/2009,12/31/2050,True,True,7,Grass,38º16'41N / 38.278056,-121º44'28W / -121.741110,"['94571', '95620']",
247,248,Holt,Stockton,North Central Region Office,San Joaquin,7/1/2016,12/31/2050,True,True,-3,Grass,37º55'55N / 37.932072,-121º23'48W / -121.396661,"['95206', '95234', '95203', '95219']",
260,261,Gazelle,Gazelle,Northern Region Office,Siskiyou,9/18/2019,12/31/2050,True,True,2743,Grass,41º32'2N / 41.533989,-122º31'56W / -122.532279,"['96034', '96038', '96064', '96094']",


In [14]:
ws["ConnectDate"] = pd.to_datetime(ws["ConnectDate"])
ws["DisconnectDate"] = pd.to_datetime(ws["DisconnectDate"])
ws.dtypes

StationNbr                 int64
Name                      object
City                      object
RegionalOffice            object
County                    object
ConnectDate       datetime64[ns]
DisconnectDate    datetime64[ns]
IsActive                    bool
IsEtoStation                bool
Elevation                  int64
GroundCover               object
HmsLatitude               object
HmsLongitude              object
ZipCodes                  object
SitingDesc               float64
dtype: object

In [15]:
ws[ws.ConnectDate.dt.year > 2018]

Unnamed: 0,StationNbr,Name,City,RegionalOffice,County,ConnectDate,DisconnectDate,IsActive,IsEtoStation,Elevation,GroundCover,HmsLatitude,HmsLongitude,ZipCodes,SitingDesc
258,259,Ferndale Plain,Ferndale,Northern Region Office,Humboldt,2019-08-05,2050-12-30,True,True,21,Grass,40º36'16N / 40.604467,-124º14'35W / -124.243186,"['95536', '95540', '95551']",
259,260,Montague,Montague,Northern Region Office,Siskiyou,2019-09-20,2050-12-31,True,True,2663,Grass,41º47'54N / 41.798331,-122º27'48W / -122.463425,"['96038', '96044', '96064', '96097']",
260,261,Gazelle,Gazelle,Northern Region Office,Siskiyou,2019-09-18,2050-12-31,True,True,2743,Grass,41º32'2N / 41.533989,-122º31'56W / -122.532279,"['96034', '96038', '96064', '96094']",
261,262,Linden,Linden,North Central Region Office,San Joaquin,2020-02-26,2050-12-31,True,True,111,Grass,38º3'56N / 38.065692,-121º4'18W / -121.071747,"['95240', '95236', '95227', '95237', '95252', ...",
262,263,Smith River,Crescent City,Northern Region Office,Del Norte,2020-10-07,2050-12-31,True,True,22,Grass,41º53'41N / 41.894592,-124º9'54W / -124.165043,"['95532', '95567']",
263,264,Sierra Valley Center,Portola,Northern Region Office,Plumas,2020-10-28,2050-12-31,True,True,4898,Grass,39º46'39N / 39.777452,-120º16'25W / -120.273609,"['96106', '96118', '96122', '96129', '96135']",


In [20]:
ws[ws.StationNbr.isin(wind_max.index)].sort_values("County")

Unnamed: 0,StationNbr,Name,City,RegionalOffice,County,ConnectDate,DisconnectDate,IsActive,IsEtoStation,Elevation,GroundCover,HmsLatitude,HmsLongitude,ZipCodes,SitingDesc
139,140,Twitchell Island,Rio Vista,North Central Region Office,Sacramento,1997-10-08,2050-12-31,True,True,0,Grass,38º7'18N / 38.121739,-121º40'28W / -121.674455,"['94571', '95641']",
247,248,Holt,Stockton,North Central Region Office,San Joaquin,2016-07-01,2050-12-31,True,True,-3,Grass,37º55'55N / 37.932072,-121º23'48W / -121.396661,"['95206', '95234', '95203', '95219']",
260,261,Gazelle,Gazelle,Northern Region Office,Siskiyou,2019-09-18,2050-12-31,True,True,2743,Grass,41º32'2N / 41.533989,-122º31'56W / -122.532279,"['96034', '96038', '96064', '96094']",
120,121,Dixon,Dixon,North Central Region Office,Solano,1994-09-20,2050-12-31,True,True,37,Grass,38º24'56N / 38.415564,-121º47'13W / -121.786910,['95620'],
211,212,Hastings Tract East,Dixon,North Central Region Office,Solano,2009-10-20,2050-12-31,True,True,7,Grass,38º16'41N / 38.278056,-121º44'28W / -121.741110,"['94571', '95620']",


In [25]:
ws.County.value_counts().sort_index()

Alameda             5
Alpine              1
Amador              2
Butte               2
Colusa              2
Contra Costa        6
Del Norte           1
El Dorado           2
Fresno             12
Glenn               2
Humboldt            1
Imperial           14
Inyo                3
Kern               14
Kings               4
Lassen              1
Los Angeles        16
Madera              2
Marin               3
Mendocino           2
Merced              3
Modoc               1
Monterey           18
Napa                3
Orange              3
Placer              1
Plumas              1
Riverside          26
Sacramento          3
San Benito          2
San Bernardino     11
San Diego          11
San Joaquin         8
San Luis Obispo     4
San Mateo           2
Santa Barbara      12
Santa Clara         3
Santa Cruz          7
Shasta              2
Siskiyou            7
Solano              6
Sonoma              6
Stanislaus          5
Sutter              2
Tehama              3
Tulare    