In [1]:
import os
import math
import pandas as pd
import numpy as np
import urllib
import matplotlib.pyplot as plt
%matplotlib inline

from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2

import datetime

import hydrofunctions as hf

import fbprophet 

# For inputting City information to get distance to sensors.
from  geopy.geocoders import Nominatim

# Date time conversion registration.
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()


In [80]:
## Reading in site data from 'Gage_sites.txt'
file = 'Gage_sites.txt'
sites = pd.read_csv(file, sep = '\t', header = 39, skiprows = [40], dtype = {"site_no" : "str"})

print(sites.iloc[330])
sites.drop([65, 71, 77, 117, 118, 138, 203, 225, 320, 330, 331, 338], inplace = True)

site_no = list(sites["site_no"])
site_nm = list(sites["station_nm"])
site_loc = sites.filter(['site_no', 'station_nm', 'dec_lat_va', 'dec_long_va'])
sites.shape

site_no                                               385553107243301
station_nm            NORTH FORK GUNNISON BLW RAVEN GULCH NR SOMERSET
lat_va                                                         385553
long_va                                                   1.07243e+06
coord_acy_cd                                                        U
coord_datum_cd                                                  NAD83
dec_lat_va                                                    38.9313
dec_long_va                                                  -107.409
coord_acy_cd.1                                                      U
dec_coord_datum_cd                                              NAD83
state_cd                                                            8
map_nm                                                   SOMERSET, CO
alt_va                                                           6158
alt_acy_va                                                        NaN
alt_datum_cd        

(327, 20)

In [81]:
dbname = 'usgs_stream_db'
username = 'cadeadams'

engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))

if not database_exists(engine.url):
    create_database(engine.url)

In [82]:
#site_no.index('394220106431500') ## Has water temp data. Need to extract...

site_loc.to_sql('site_locations', engine, if_exists='replace')

In [83]:
start = '2000-01-01'
end = str(datetime.datetime.today().strftime('%Y-%m-%d')) #Gets today's date.

for site in site_no :
    herring = hf.NWIS(site, 'dv', start, end)
    herring.get_data()
    my_dict = herring.json()
    df = hf.extract_nwis_df(my_dict)
    df.rename(index=str, columns = {"USGS:"+site+":00060:00003" : "y", 
                                    "USGS:"+site+":00060:00003_qualifiers" : "flags"}, 
             inplace = True)
    df['ds'] = df.index[:]
#    df['ds'].str.split(pat = ' ', expand = True)
    df.to_sql("n"+str(site), engine, if_exists='replace')

In [79]:
site_no.index('404417108524900')

327

In [7]:
con = None
con = psycopg2.connect(database = dbname, user = username)

# query:
sql_query = """
SELECT * FROM n"""+site_no[0]+""";
"""
site_data_from_sql = pd.read_sql_query(sql_query,con)
site_data_from_sql.head()

DatabaseError: Execution failed on sql '
DROP TABLE 06614800;
': syntax error at or near "06614800"
LINE 2: DROP TABLE 06614800;
                   ^


In [16]:
df.plot()
plt.xlabel('Date')
plt.yscale('log')
plt.ylabel('Discharge (cfs)')
plt.title('Daily Mean Discharge for Roaring Fork River')
plt.savefig('RRFork1.png')
plt.close()

In [8]:
df.rename(index=str, columns = {"USGS:09081000:00060:00003" : "y", 
                                "USGS:09081000:00060:00003_qualifiers" : "flags"}, 
         inplace = True)

df['ds'] = df.index[:]

df.head()

Unnamed: 0_level_0,y,flags,ds
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-23 00:00:00,252.0,A,2000-01-23 00:00:00
2000-01-24 00:00:00,252.0,A,2000-01-24 00:00:00
2000-01-25 00:00:00,264.0,A,2000-01-25 00:00:00
2000-01-26 00:00:00,271.0,A,2000-01-26 00:00:00
2000-01-27 00:00:00,262.0,A,2000-01-27 00:00:00


In [9]:
df_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05, daily_seasonality=True, interval_width = 0.25)
df_prophet.fit(df)

<fbprophet.forecaster.Prophet at 0x115c026a0>

In [10]:
df_forecast = df_prophet.make_future_dataframe(periods=365 * 1, freq='D')
df_forecast = df_prophet.predict(df_forecast)

In [11]:
df_prophet.plot(df_forecast, xlabel = 'Date', ylabel = 'Discharge (cfs)')
plt.title('Discharge at ROARING FORK RIVER NEAR EMMA, CO')
plt.ylim(55,6000)
plt.yscale('log')
plt.savefig('RRFork1_Pred.png')
plt.close()

In [12]:
df_prophet.plot_components(df_forecast)
plt.savefig('RRFork1_Components.png')
plt.close()

In [15]:
help(herring)

Help on NWIS in module hydrofunctions.station object:

class NWIS(Station)
 |  A class for working with data from the USGS NWIS service.
 |  
 |  description
 |  
 |  Args:
 |      site (str or list of strings):
 |          a valid site is '01585200' or ['01585200', '01646502']. Site
 |          should be None if stateCd or countyCd are not None.
 |  
 |      service (str):
 |          can either be 'iv' or 'dv' for instantaneous or daily data.
 |          'dv'(default): daily values. Mean value for an entire day.
 |          'iv': instantaneous value measured at this time. Also known
 |          as 'Real-time data'. Can be measured as often as every
 |          five minutes by the USGS. 15 minutes is more typical.
 |  
 |      start_date (str):
 |         should take on the form yyyy-mm-dd
 |  
 |      end_date (str):
 |          should take on the form yyyy-mm-dd
 |  
 |      stateCd (str):
 |          a valid two-letter state postal abbreviation. Default is None.
 |  
 |      county