In [1]:
# Import packages
import os
import urllib
import requests
import math
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import earthpy as et
import hydrofunctions as hf

# Date time conversion registration
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

# Get the data & set working director
# data = et.data.get_data('colorado-flood')
# os.chdir(os.path.join(et.io.HOME, 'earth-analytics'))

# Prettier plotting with seaborn
sns.set(font_scale=1.5, style="whitegrid")

In [2]:
# Create map of stations
hf.draw_map()

In [6]:
# # Request data for all stations in Colorado
# PR = hf.NWIS(stateCd='CO').get_data()

# # List the names for the first 5 sites in Colorado, USA
# PR.head()

In [5]:
# Define the site number and start and end dates that you are interested in
site = "06730500"
start = '1946-05-10'
end = '2018-08-29'

# Request data for that site and time period
longmont_resp = hf.get_nwis(site, 'dv', start, end)

# Convert the response to a json in order to use the extract_nwis_df function
longmont_resp = longmont_resp.json()

# Get metadata about the data
hf.get_nwis(site, 'dv').json()

Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&sites=06730500&startDT=1946-05-10&endDT=2018-08-29
Requested data from https://waterservices.usgs.gov/nwis/dv/?format=json%2C1.1&sites=06730500


{'name': 'ns1:timeSeriesResponseType',
 'declaredType': 'org.cuahsi.waterml.TimeSeriesResponseType',
 'scope': 'javax.xml.bind.JAXBElement$GlobalScope',
 'value': {'queryInfo': {'queryURL': 'http://waterservices.usgs.gov/nwis/dv/format=json%2C1.1&sites=06730500',
   'criteria': {'locationParam': '[ALL:06730500]',
    'variableParam': 'ALL',
    'parameter': []},
   'note': [{'value': '[ALL:06730500]', 'title': 'filter:sites'},
    {'value': '[mode=LATEST, modifiedSince=null]',
     'title': 'filter:timeRange'},
    {'value': 'methodIds=[ALL]', 'title': 'filter:methodId'},
    {'value': '2021-09-02T16:07:57.502Z', 'title': 'requestDT'},
    {'value': 'f026c5e0-0c07-11ec-88d1-2cea7f5e5ede', 'title': 'requestId'},
    {'value': 'Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.',
     'title': 'disclaimer'},
    {'value': 'sdas01', 'title': 'server'}]},
  'timeSeries': [{'sourceInfo': {'siteName': 'BOULDER CREEK AT MOUTH

In [14]:
# Get the data in a pandas dataframe format
longmont_discharge = hf.extract_nwis_df(longmont_resp)
l_discharge = pd.DataFrame(longmont_discharge[0])
l_discharge.head()

Unnamed: 0_level_0,USGS:06730500:00060:00003,USGS:06730500:00060:00003_qualifiers
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1
1946-05-10 00:00:00+00:00,16.0,A
1946-05-11 00:00:00+00:00,19.0,A
1946-05-12 00:00:00+00:00,9.0,A
1946-05-13 00:00:00+00:00,3.0,A
1946-05-14 00:00:00+00:00,7.8,A


In [15]:

l_discharge.columns = ['discharge','flag']
l_discharge.head()

Unnamed: 0_level_0,discharge,flag
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1
1946-05-10 00:00:00+00:00,16.0,A
1946-05-11 00:00:00+00:00,19.0,A
1946-05-12 00:00:00+00:00,9.0,A
1946-05-13 00:00:00+00:00,3.0,A
1946-05-14 00:00:00+00:00,7.8,A


In [16]:
l_discharge.tail()

Unnamed: 0_level_0,discharge,flag
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-08-25 00:00:00+00:00,9.86,A
2018-08-26 00:00:00+00:00,7.02,A
2018-08-27 00:00:00+00:00,4.05,A
2018-08-28 00:00:00+00:00,2.67,A
2018-08-29 00:00:00+00:00,3.36,A


In [19]:
# Plot using matplotlib
fig, ax = plt.subplots(figsize=(11, 6))

ax.scatter(x=l_discharge.index.values,
           y=l_discharge["discharge"],
           marker="o",
           s=4,
           color="purple")

ax.set(xlabel="Date", ylabel="Discharge Value (CFS)",
       title="Stream Discharge - Station {} \n {} to {}".format(site, start, end))

plt.show()

  plt.show()


In [20]:
# add a year column to your longmont discharge data
l_discharge["year"] = l_discharge.index.year

# Calculate annual max by resampling
l_discharge_annual_max = l_discharge.resample('AS').max()
l_discharge_annual_max.head()

Unnamed: 0_level_0,discharge,flag,year
datetimeUTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1946-01-01 00:00:00+00:00,99.0,A,1946
1947-01-01 00:00:00+00:00,1930.0,A,1947
1948-01-01 00:00:00+00:00,339.0,A,1948
1949-01-01 00:00:00+00:00,2010.0,hf.missing,1949
1950-01-01 00:00:00+00:00,63.594991,hf.missing,1950


In [22]:
# download usgs annual max data from figshare
url = "https://nwis.waterdata.usgs.gov/nwis/peak?site_no=06730500&agency_cd=USGS&format=rdb"
download_path = "/home/awstclair/github_forks/earth-analytics/data/colorado-flood/downloads/annual-peak-flow.txt"



urllib.request.urlretrieve(url, download_path)

('/home/awstclair/github_forks/earth-analytics/data/colorado-flood/downloads/annual-peak-flow.txt',
 <http.client.HTTPMessage at 0x7db83b46f970>)

In [23]:
# A function that counts the number of lines with a comment 
def count_the(file_url):
    r = requests.get(file_url, stream=True)
    file = r.text
    count = 0
    for line in file:
        if line.startswith('#'):
            count += 1
    return count

# Lines to skip
line_to_skip = count_the(url)+1

In [24]:
# Open the data using pandas
usgs_annual_max = pd.read_csv(download_path,
                              skiprows=[line_to_skip],
                              comment="#",
                              sep='\t',
                              usecols=["peak_dt","peak_va"],
                              parse_dates=["peak_dt"],
                              index_col="peak_dt")

usgs_annual_max.head()

Unnamed: 0_level_0,peak_va
peak_dt,Unnamed: 1_level_1
1927-07-29,407.0
1928-06-04,694.0
1929-07-23,530.0
1930-08-18,353.0
1931-05-29,369.0


In [25]:
# Add a year column to the data for easier plotting
usgs_annual_max["year"] = usgs_annual_max.index.year

# Are there any years that have two entries?
usgs_annual_max[usgs_annual_max.duplicated(subset="year") == True]

Unnamed: 0_level_0,peak_va,year
peak_dt,Unnamed: 1_level_1,Unnamed: 2_level_1
1947-10-15,721.0,1947
1993-10-18,497.0,1993


In [26]:
# Remove duplicate years - keep the max discharge value
usgs_annual_max = usgs_annual_max.sort_values(
    'peak_va', ascending=False).drop_duplicates('year').sort_index()

# If this returns no results you have successfully removed duplicates!
usgs_annual_max[usgs_annual_max.duplicated(subset="year") == True]

Unnamed: 0_level_0,peak_va,year
peak_dt,Unnamed: 1_level_1,Unnamed: 2_level_1


In [27]:
# Plot calculated vs USGS annual max flow values
fig, ax = plt.subplots(figsize=(11, 9))

ax.plot(usgs_annual_max["year"],
        usgs_annual_max["peak_va"],
        color="purple",
        linestyle=':',
        marker='o',
        label="Instantaneous Value")

ax.plot(l_discharge_annual_max["year"],
        l_discharge_annual_max["discharge"],
        color="lightgrey",
        linestyle=':',
        marker='o', label="Mean Daily Value")
ax.legend()
ax.set_title(
    "Annual Maxima - Downloaded Instantaneous vs. Derived Daily Peak Flows")

plt.show()

NameError: name 'longmont_discharge_annual_max' is not defined