In [18]:
from RAWS_report import Report
import pandas as pd
import numpy as np

### Report() class description
- This python class can be used to scrape data from the Western Regional Climate Center weather database. 
- The database is comprised of remote weather station data ("RAWS" stations). These stations are spread throughout the US. 
- The database is continuously updated with the latest data collected by operating RAWS stations
- The Report() class can scrape data reports from specified stations in a given time range via Report.GetReport()
    - The most recently pulled report is stored in Report.df_rep
- RAWS station weather metadata can be looked up at https://wrcc.dri.edu/Monitoring/Stations/raws_inventory.php
    - This gives the user the station ID which is needed as an input for Report.GetReport()
- To make it easier for the user, there is a method for obtaining all California SiteIDs and metadata: Report.GetIDs()
    - This method builds a dataframe with station names, SiteIDs, lat-lon, elevation, and years of data availability
    - After completion the SiteIDs dataframe is stored in Report.SiteIDs
    - GetIDs can take a few minutes to run, so it is wise to save the SiteIDs dataframe for future reference
    - Version 2 (in development) will allow extraction of SiteIDs for any specified state
- Although the station metadata can currently only be extracted for CA, it is important to note that Report.GetReport() can be used for any RAWs station
- The class also two data cleaning and normalization methods:
    - Report.QuarterlyReslice reformats the dataframe entries by quarter, the reports are initially by month
    - Report.HistAv normalizes QuarterlyReslice dataframe entries using the average over the quarter in the dataframe

### Class usage example

#### Run this code block if you want Report to get all CA SiteIDs and metadata (takes time, alternative below)
- The importance of SiteIDs dataframe is the station metadata. The user may want to filter on latitude-longitude, elevation. etc.
- This takes roughly 5 minutes. The next code block loads in previously obtained metadata instead

In [2]:
#Initialize class
rep = Report()

#Get SiteIDs
rep.GetIDs() #this takes a bit to run. You'll want to save the df_SiteIDs as a csv with pd.DataFrame.to_csv and load it in in the future by:
    #rep = Report(SiteIDs = df_SiteIDs)

#### Run this code block to load in previously obtained CA SiteIDs

In [19]:
#load in SiteIDs

df_SiteIDs = pd.read_csv('RAWS_SiteIDs_CA.csv')
rep = Report(SiteIDs = df_SiteIDs)

#### Preview of SiteIDs

In [20]:
#Here is what the SiteIDs df looks like
rep.SiteIDs.sample(10)

#values are -9999 if no elevation and coordinate data could be found in the wrcc.dri.edu metadata list

#Y_start and Y_end indicate data availability range. That does not mean the data is always available every month
    #in that year range, you can see that info on the station webpage (in the wrcc.dri.edu site). Additionally,
    #sometimes if you start collecting before the first month of availability it wil return a blank report

Unnamed: 0,Name,Symbol,Y_start,Y_end,Elev,Lat,Lon
302,Los Vaqueros,CLVQ,2004,2022,-9999,-9999.0,-9999.0
527,Dos Palmas,CDPA,2010,2022,-9999,-9999.0,-9999.0
45,Cold Creek,CCCK,2003,2004,6350,41.781667,-120.318333
395,Delonagha,CDEL,1997,2002,3120,35.57,-118.616667
689,Talega Ridge,CTLR,2014,2022,1215,33.4775,-117.486389
542,Fallbrook,CFLB,2018,2022,-9999,-9999.0,-9999.0
309,Metcalf Gap,CMET,1990,2022,3077,37.409444,-119.768056
514,Camp 9,CCP9,1995,2022,4000,34.361667,-118.421667
426,Milk Ranch,CMLK,1997,1999,6225,36.486667,-118.78
257,County Line,CCOU,1994,2022,2085,39.018889,-122.411944


#### Get station reports selected via station metadata stored in SiteIDs

In [21]:
#Lets pull all stations with elevation about 8000 feet
I_el = rep.SiteIDs['Elev'] >= 8000

rep.SiteIDs[I_el]

Unnamed: 0,Name,Symbol,Y_start,Y_end,Elev,Lat,Lon
108,Hat Mountain,CHTM,1995,1999,8000,40.501667,-121.4225
370,Gaylor Meadow,CTUO,1988,2011,9270,37.868333,-119.318333
376,White Wolf,CWWO,1988,2022,8025,37.851111,-119.65
383,Bear Peak,CBEA,1991,2022,8228,35.884167,-118.051667
388,Blackrock,CBRK,1999,2022,8200,36.093056,-118.260278
440,Rattlesnake,CRTL,1992,2022,8600,36.406944,-118.421667
445,Sugarloaf,CSUG,1992,2022,8120,36.726667,-118.675
446,Mount Tom,CTOM,1999,2022,9018,37.376111,-119.178333
472,Bear Peak,CBEA,1991,2022,8228,35.884167,-118.051667
484,Blackrock,CBRK,1999,2022,8200,36.093056,-118.260278


#### GetReport example

In [22]:
#GetReport for each station

#as a demo, select two stations from the above list
Stn_syms = ['CBEA','CHTM']
Nstn = len(Stn_syms)

time_r = {'smon':'01','syea':'15','emon':'01','eyea':'17'}

dfs = [None]*Nstn
for ii in range(Nstn):
    rep.GetReport(stn=Stn_syms[ii],TimeInput=time_r)
    dfs[ii] = rep.df_rep #the most recent report is stored here
    print('Done with',ii+1,'out of',Nstn)

Done with 1 out of 2
Done with 2 out of 2


In [23]:
#CBEA dataframe

dfs[0].head()

Unnamed: 0,Station,Elev,Lat,Lon,Month,Year,"Solar Radiation, ly, Total","Mean Wind Speed, mph, Ave.","Mean Wind Direction, Deg, Vector Ave.","Maximum Wind Gust, mph, Max.","Average Air Temperature, DegF, Ave.","Average Air Temperature, DegF, Ave. Daily Max.","Average Air Temperature, DegF, Max.","Average Air Temperature, DegF, Ave. Daily Min.","Average Air Temperature, DegF, Min.","Average Relative Humidity, %, Ave.","Average Relative Humidity, %, Max.","Average Relative Humidity, %, Min.","Precipitation, in, Total"
0,CBEA,8228,35.884167,-118.051667,1,2015,8503.0,10.81,52.27,59.0,39.1,45.71,55.0,34.29,21.0,44.59,100.0,2.0,0.35
1,CBEA,8228,35.884167,-118.051667,2,2015,11142.0,14.39,304.4,57.0,42.88,51.11,71.0,37.0,20.0,37.51,100.0,6.0,0.12
2,CBEA,8228,35.884167,-118.051667,3,2015,15322.0,12.11,22.98,55.0,43.58,51.58,63.0,37.13,21.0,39.26,99.0,5.0,0.04
3,CBEA,8228,35.884167,-118.051667,4,2015,19725.0,14.75,301.3,64.0,42.1,51.07,63.0,34.2,20.0,37.51,100.0,6.0,0.0
4,CBEA,8228,35.884167,-118.051667,5,2015,18877.0,12.44,271.1,45.0,45.85,54.35,74.0,37.97,22.0,53.29,100.0,9.0,0.27


In [24]:
#CHTM dataframe. It is blank because the station only operated between 1995 and 1999

dfs[1].head()

Unnamed: 0,Station,Elev,Lat,Lon,Month,Year,"Solar Radiation, ly, Total","Mean Wind Speed, mph, Ave.","Mean Wind Direction, Deg, Vector Ave.","Maximum Wind Gust, mph, Max.","Average Air Temperature, DegF, Ave.","Average Air Temperature, DegF, Ave. Daily Max.","Average Air Temperature, DegF, Max.","Average Air Temperature, DegF, Ave. Daily Min.","Average Air Temperature, DegF, Min.","Average Relative Humidity, %, Ave.","Average Relative Humidity, %, Max.","Average Relative Humidity, %, Min.","Precipitation, in, Total"


#### Example of QuarterlyReslice method

In [25]:
#You can also reslice the dataframes into quarterly slices (where averaging for the data is done between the
    #3 months in the quarter).
    
#First we will grab a report
stn = 'CMSJ'
rep.GetReport(stn,TimeInput = {'smon':'01','syea':'19','emon':'12','eyea':'21'})
print('Done with getting report')

Done with getting report


In [26]:
df_q = rep.QuarterlyReslice(YearRange=(2019,2021))

#setting inplace=True will replace rep.df_rep with quarter sliced df
#can also input a df using 'df' keyword

In [27]:
df_q.head(8)

Unnamed: 0,Station,Elev,Lat,Lon,Q,Year,"Solar Radiation, ly, Total","Mean Wind Speed, mph, Ave.","Mean Wind Direction, Deg, Vector Ave.","Maximum Wind Gust, mph, Max.","Average Air Temperature, DegF, Ave.","Average Air Temperature, DegF, Ave. Daily Max.","Average Air Temperature, DegF, Max.","Average Air Temperature, DegF, Ave. Daily Min.","Average Air Temperature, DegF, Min.","Average Relative Humidity, %, Ave.","Average Relative Humidity, %, Max.","Average Relative Humidity, %, Min.","Precipitation, in, Total"
0,CMSJ,8616,33.815278,-116.641944,1,2019,9716.666667,6.079667,164.9,48.666667,33.62,44.146667,59.0,26.35,10.333333,48.223333,100.0,4.0,6.036667
1,CMSJ,8616,33.815278,-116.641944,2,2019,16865.666667,4.502,132.25,33.333333,49.596667,59.92,69.666667,41.476667,29.666667,38.003333,88.666667,3.0,0.393333
2,CMSJ,8616,33.815278,-116.641944,3,2019,16634.666667,4.229667,149.213333,28.0,62.56,73.316667,81.666667,55.1,49.0,29.39,88.333333,2.333333,0.92
3,CMSJ,8616,33.815278,-116.641944,4,2019,9760.666667,4.934333,240.7,41.0,42.15,51.783333,63.333333,34.87,17.666667,35.926667,96.666667,0.666667,1.413333
4,CMSJ,8616,33.815278,-116.641944,1,2020,10854.666667,4.961667,228.1,39.666667,36.836667,46.86,57.333333,29.23,15.0,37.893333,100.0,3.666667,2.27
5,CMSJ,8616,33.815278,-116.641944,2,2020,18160.333333,4.274667,116.626667,33.333333,52.816667,63.17,76.0,45.123333,29.666667,32.033333,83.666667,3.333333,0.706667
6,CMSJ,8616,33.815278,-116.641944,3,2020,17039.333333,4.585333,172.5,30.0,65.323333,76.37,86.0,57.676667,47.333333,24.543333,58.666667,2.666667,0.0
7,CMSJ,8616,33.815278,-116.641944,4,2020,10245.666667,4.302333,273.6,37.666667,45.316667,54.696667,65.0,38.36,19.0,24.753333,100.0,2.666667,0.1


#### Example of HistavNorm method

In [28]:
#You can additionally normalize the quarterly slice dataframe to the total average for the quarter over all years.
    #Data is then unitless and represented as a frac. change from the average 
    
df_q_norm = rep.HistavNorm(df_q)
df_q_norm.head(8)

#HistavNorm also has 'inplace' and 'df' keyword options (like QuarterlyReslice)

Unnamed: 0,Station,Elev,Lat,Lon,Q,Year,"Solar Radiation, Total","Mean Wind Speed, Ave.","Mean Wind Direction, Vector Ave.","Maximum Wind Gust, Max.","Average Air Temperature, Ave.","Average Air Temperature, Ave. Daily Max.","Average Air Temperature, Max.","Average Air Temperature, Ave. Daily Min.","Average Air Temperature, Min.","Average Relative Humidity, Ave.","Average Relative Humidity, Max.","Average Relative Humidity, Min.","Precipitation, Total"
0,CMSJ,8616,33.815278,-116.641944,1,2019,-0.083823,0.153783,-0.245194,0.146597,-0.059493,-0.040591,0.001887,-0.063906,-0.154545,0.174842,0.003344,0.125,0.94801
1,CMSJ,8616,33.815278,-116.641944,2,2019,-0.053259,0.025876,0.320622,-0.022801,-0.054942,-0.048133,-0.065574,-0.068824,-0.025547,0.214293,0.045872,0.08,0.044248
2,CMSJ,8616,33.815278,-116.641944,3,2019,0.008753,-0.009935,-0.09807,0.037037,-0.022415,-0.017832,-0.030343,-0.026158,0.023202,-0.040135,0.086066,-0.3,0.574144
3,CMSJ,8616,33.815278,-116.641944,4,2019,-0.028789,0.088803,-0.03725,0.066474,-0.034045,-0.026873,-0.013841,-0.044017,-0.047904,0.142463,0.022327,-0.625,0.347458
4,CMSJ,8616,33.815278,-116.641944,1,2020,0.023478,-0.058388,0.044095,-0.065445,0.030492,0.018376,-0.026415,0.038407,0.227273,-0.076823,0.003344,0.03125,-0.267479
5,CMSJ,8616,33.815278,-116.641944,2,2020,0.019416,-0.025927,0.16461,-0.022801,0.006415,0.003495,0.019374,0.013046,-0.025547,0.023538,-0.013106,0.2,0.876106
6,CMSJ,8616,33.815278,-116.641944,3,2020,0.033292,0.073318,0.042688,0.111111,0.020766,0.023071,0.021108,0.019383,-0.011601,-0.198425,-0.278689,-0.2,-1.0
7,CMSJ,8616,33.815278,-116.641944,4,2020,0.019469,-0.050653,0.094343,-0.020231,0.038526,0.027875,0.012111,0.051663,0.023952,-0.212847,0.057579,0.5,-0.904661
