In [1]:
# Set up environment

import subprocess
import warnings
from io import BytesIO

import folium
import hvplot.pandas
import pandas as pd
import requests

warnings.filterwarnings('ignore')

In [2]:
# Site map for Boulder Creek, CO

sg_lat = 40.05165184
sg_lon = -105.1788754

# Initialize map and tweak settings
m = folium.Map(
    # Location to display
    location=(sg_lat, sg_lon),
    # Turns off annoying zooming while trying to scroll to the next cell
    scrollWheelZoom=False)

# Put a marker at the stream gauge location
folium.Marker([sg_lat, sg_lon], popup="Stream Gauge on the Boulder Creek"
               "in Boulder, CO").add_to(m)

# Display the map
m

In [3]:
# Read url

nwis_url = (
    "https://waterdata.usgs.gov/nwis/dv"
    "?cb_00060=on"
    "&format=rdb"
    "&site_no=06730200"
    "&legacy="
    "&referred_module=sw"
    "&period="
    "&begin_date=1985-10-01"
    "&end_date=2022-09-30")

# Send an HTTP GET request to the URL
nwis_response = requests.get(nwis_url)
nwis_response.raise_for_status()

nwis_response

<Response [200]>

In [4]:
# Print the top of the data
for i, line in enumerate(nwis_response.content.splitlines()[:10]):
    print(i, line)

1 b'# Some of the data that you have obtained from this U.S. Geological Survey database'
2 b"# may not have received Director's approval. Any such data values are qualified"
3 b'# as provisional and are subject to revision. Provisional data are released on the'
4 b'# condition that neither the USGS nor the United States Government may be held liable'
5 b'# for any damages resulting from its use.'
6 b'#'
7 b'# Additional info: https://help.waterdata.usgs.gov/policies/provisional-data-statement'
8 b'#'
9 b'# File-format description:  https://help.waterdata.usgs.gov/faq/about-tab-delimited-output'


In [5]:
# Take a look at the data. What got downloaded?
for i, line in enumerate(nwis_response.content.splitlines()[:35]):
    # Skip commented lines
    if not line.startswith(b'#'):
        print(i, line)

27 b'agency_cd\tsite_no\tdatetime\t17663_00060_00003\t17663_00060_00003_cd'
28 b'5s\t15s\t20d\t14n\t10s'
29 b'USGS\t06730200\t1986-10-01\t30.0\tA'
30 b'USGS\t06730200\t1986-10-02\t30.0\tA'
31 b'USGS\t06730200\t1986-10-03\t30.0\tA'
32 b'USGS\t06730200\t1986-10-04\t30.0\tA'
33 b'USGS\t06730200\t1986-10-05\t30.0\tA'
34 b'USGS\t06730200\t1986-10-06\t30.0\tA'


In [6]:
# Import data with pandas

co_q_df = pd.read_csv(
    BytesIO(nwis_response.content),
    comment="#",
    delimiter="\t",
    skiprows=[27, 28],
    names=["agency_cd", "site_no", "datetime", "streamflow_cfs", "code"],
    index_col="datetime",
    parse_dates=True,
)
co_q_df

Unnamed: 0_level_0,agency_cd,site_no,streamflow_cfs,code
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1986-10-01,USGS,6730200,30.0,A
1986-10-02,USGS,6730200,30.0,A
1986-10-03,USGS,6730200,30.0,A
1986-10-04,USGS,6730200,30.0,A
1986-10-05,USGS,6730200,30.0,A
...,...,...,...,...
2022-09-26,USGS,6730200,25.1,A
2022-09-27,USGS,6730200,23.1,A
2022-09-28,USGS,6730200,24.5,A
2022-09-29,USGS,6730200,26.1,A


In [7]:
# replace dataframe

co_q_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 13149 entries, 1986-10-01 to 2022-09-30
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   agency_cd       13149 non-null  object 
 1   site_no         13149 non-null  int64  
 2   streamflow_cfs  13149 non-null  float64
 3   code            13149 non-null  object 
dtypes: float64(1), int64(1), object(2)
memory usage: 513.6+ KB


In [8]:
#plot data for specific time period

co_flood_df = co_q_df['2018-10':'2020-09']
co_flood_df

Unnamed: 0_level_0,agency_cd,site_no,streamflow_cfs,code
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-10-01,USGS,6730200,34.8,A
2018-10-02,USGS,6730200,33.0,A
2018-10-03,USGS,6730200,28.9,A
2018-10-04,USGS,6730200,28.9,A
2018-10-05,USGS,6730200,29.3,A
...,...,...,...,...
2020-09-26,USGS,6730200,17.0,A
2020-09-27,USGS,6730200,18.6,A
2020-09-28,USGS,6730200,21.2,A
2020-09-29,USGS,6730200,22.7,A


In [9]:
# plot from 2018-2020

co_flood_df.hvplot(
    y='streamflow_cfs',
    title='Streamflow in Boulder Creek, Boulder, CO',
    xlabel='Date', ylabel='Streamflow')

In [10]:
#plot total streamflow over time

co_q_df.streamflow_cfs.hvplot()

In [11]:
# resample

co_ann_max_q_df = co_q_df[['streamflow_cfs']].resample('AS').max()
co_ann_max_q_df

Unnamed: 0_level_0,streamflow_cfs
datetime,Unnamed: 1_level_1
1986-01-01,77.0
1987-01-01,887.0
1988-01-01,564.0
1989-01-01,331.0
1990-01-01,402.0
1991-01-01,659.0
1992-01-01,280.0
1993-01-01,806.0
1994-01-01,354.0
1995-01-01,1420.0


In [12]:
# plot resampled data

co_ann_max_q_df.hvplot(
    title='Streamflow in Boulder Creek, Boulder, CO from 1986-2022',
    xlabel='Date', ylabel='Streamflow'
)

In [14]:
%%capture
%%bash
jupyter nbconvert bouldercreek-timeseries.ipynb --to html --no-input