# USGS Stream Flow Bulk Downloader
AUTH: Nathan T. Stevens  
ORG: Pacific Northwest Seismic Network  
LICENSE: GNU GPLv3  
PURPOSE: This notebook details how to get USGS surface water gage data from the WaterWatch dataservice. 

In [69]:
import pandas as pd
from pathlib import Path
import requests, os
from collections import defaultdict

In [70]:
# Map location of data directory created by 
# USGS_Stream_Gauge_Metadata_Downloader.ipynb
PWD = Path().cwd()
DATADIR = PWD/'USGS_Stream_Gauge'
SITE_CSV = DATADIR/'usgs_gauge_site_metadata.csv'
batchsize = 10
# Specify time-range for query
t0 = pd.Timestamp('2025-11-01 00:00:00', tz='US/Pacific')
t1 = pd.Timestamp('2025-12-31 23:59:59', tz='US/Pacific')
# Data types
params = ['stage','discharge']

In [71]:
# Load site metadata
df_site = pd.read_csv(SITE_CSV, index_col='id')
display(df_site)

Unnamed: 0_level_0,name,lat,lng,class,url,huc_cd,Stage (ft),Discharge (cfs),Class,Length of record (years),Date,% normal(median) (%),% normal(mean) (%),Status,Stage (adj) (ft)
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
10352500,"USGS 10352500 MCDERMITT CK NR MCDERMITT, NV",41.96655720,-117.83181200,4,https://waterdata.usgs.gov/monitoring-location...,16040201,2.23,3.77,10-24,74.0,2025-12-15 12:00:00-08:00,49.93,28.54,,4547.23
10387110,USGS 10387110 CHEWAUCAN RIVER AT MOUTH NEAR VA...,42.52208056,-120.24945000,0,https://waterdata.usgs.gov/monitoring-location...,171200060506,8.52,,Not-ranked,,2025-12-15 12:00:00-08:00,,,,
10387150,"USGS 10387150 LAKE ABERT NEAR VALLEY FALLS, OR",42.60350000,-120.18730560,0,https://waterdata.usgs.gov/monitoring-location...,17120006,4253.30,,Not-ranked,,2025-12-15 12:45:00-08:00,,,,4253.30
10396000,USGS 10396000 DONNER UND BLITZEN RIVER NR FREN...,42.79083330,-118.86750000,5,https://waterdata.usgs.gov/monitoring-location...,17120003,1.99,50.50,25-75,94.0,2025-12-15 12:00:00-08:00,120.24,90.19,,4262.32
11491450,"USGS 11491450 IRVING CREEK NEAR LENZ, OR",42.95166667,-121.45905560,0,https://waterdata.usgs.gov/monitoring-location...,18010201,19.71,0.92,Not-ranked,,2025-12-15 12:30:00-08:00,,,,4636.71
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14244180,USGS 14244180 COWLITZ RIVER NEAR 1ST AVE NW AT...,46.14722008,-122.91605530,0,https://waterdata.usgs.gov/monitoring-location...,,10.79,,Not-ranked,,2025-09-08 12:15:00-07:00,,,,10.79
14246900,"USGS 14246900 COLUMBIA RIVER AT PORT WESTWARD,...",46.18122136,-123.18345390,5,https://waterdata.usgs.gov/monitoring-location...,17080003,8.24,211000.00,25-75,34.0,2025-12-15 12:40:00-08:00,92.14,82.78,,7.24
14246900,"USGS 14246900 COLUMBIA RIVER AT PORT WESTWARD,...",46.18122136,-123.18345390,5,https://waterdata.usgs.gov/monitoring-location...,17080003,8.24,211000.00,25-75,34.0,2025-12-15 12:40:00-08:00,92.14,82.78,,7.24
1203951610,"USGS 1203951610 QUINAULT RIVER NEAR TAHOLAH, WA",47.35777778,-124.18444440,0,https://waterdata.usgs.gov/monitoring-location...,17100102,9.97,15300.00,Not-ranked,6.0,2025-12-15 12:15:00-08:00,400.00,293.95,,81.97


In [72]:
# Define global parameters
_BASE_URL = 'https://nwis.waterservices.usgs.gov/nwis/iv/?'
# Data product mapping parameters
_PARCD_MAP = {
    'stage': '00065',
    'discharge': '00060',
    'temperature': '00010'
}
# Query key terms
_Q_KEYS = ['sites','agencyCd','startDT','endDT','parameterCd','format']
_UNITS = {'stage': 'ft', 'discharge': 'cfs', 'temperature': 'C'}
# Time shifts
_TZ_shift = {'PDT': '-7:00','PST': '-8:00'}

In [73]:
# # The USGS instantaneous gauge data query is limited to a maximum of 100 stations per query
# # this block splits out the requested stations into <= 100 batches
# batchsize = int(batchsize)
# if batchsize > 100:
#     batchsize = 100
# elif batchsize < 0:
#     batchsize = 1
# else:
#     pass
# if len(df_site) > batchsize:
#     batches = {_e: df_site.iloc[_e*batchsize: int((_e + 1)*batchsize)] for _e in range((len(df_site)//batchsize) + 1)}
# else:
#     batches = {0: df_site}

# print(f'Split into {len(batches)} batches')
# for bn, _dfs in batches.items():
#     print(f'Batch {bn}: {len(_dfs)} sites')

In [74]:
for _e, (idx, row) in enumerate(df_site.iterrows()):
    print(f'Processing site {idx} ({_e + 1}/{len(df_site)})')
    site_str = str(idx)
    param_str = ','.join([_PARCD_MAP[_p] for _p in params])
    _url = _BASE_URL + \
        f'sites={site_str}&agencyCd=USGS&' + \
        f'parameterCd={param_str}&' + \
        f'startDT={t0.isoformat()}&endDT={t1.isoformat()}&' + \
        'format=rdb'
    request = requests.get(_url)
    if request.status_code == 400:
        print('status_code: 400 - bad query - skipping')
        continue

    lines = request.text.split('\n')
    hdr = []
    body = defaultdict(list)
    for line in lines:
        if line == '':
            continue
        elif '#' == line[0]:
            hdr.append(line)
        elif 'agency_cd' in line:
            cols = []
            for col in line.split('\t'):
                if any(_k in col for _k in _Q_KEYS):
                    parts = col.split('_')
                    parname = _PARCD_MAP[parts[1]]
                    if parts[-1] == 'cd':
                        cols.append('_'.join([parname, 'cd']))
                    else:
                        cols.append(parname)
                else:
                    cols.append(col)
        elif '5s' == line[:2]:
            fstr = line.split('\t')
            fstr_mapping = dict(zip(cols, fstr))
        else:
            parts = line.split('\t')
            try:
                _tmp = dict(zip(cols, parts))
            except:
                breakpoint()
            # Parse datetime with timezone
            datetime = pd.Timestamp(_tmp['datetime']+_TZ_shift[_tmp['tz_cd']])
            _tmp['datetime'] = datetime
            # _tmp.pop('tz_cd')
            # Parse Numeric Columns as Float
            for _k, _v in _tmp.items():
                if fstr_mapping[_k][-1] == 'n':
                    try:
                        _v = float(_v)
                    except:
                        _v = float('nan')
                body[_k].append(_v)
    data = pd.DataFrame(body)
    _rnmapper = {}
    for _col in data.columns:
        if '00060' in _col:
            print(f'{_col} to discharge')
            prefix = 'discharge'
            unit = 'cfs'
        elif '00065' in _col:
            prefix = 'gage_height'
            unit = 'ft'
        elif '00010' in _col:
            prefix = 'temperature'
            unit = 'C'
        else:
            continue
        
        if '_cd' in _col:
            suffix = 'qual'
        else:
            suffix = unit
        
        _rnmapper.update({_col: '_'.join([prefix, suffix])})
    data = data.rename(columns=_rnmapper)
    data.index = data.datetime
    data.drop(columns=['datetime','tz_cd'], inplace=True)
    if _e == 0:
        display(hdr)
        display(data)
    savedir = DATADIR/str(idx)
    os.makedirs(savedir, exist_ok=True)
    data.to_csv(savedir/f'{idx}_data.csv', header=True, index=True)
    with open(str(savedir/f'{idx}_header.txt'), 'w') as _f:
        for _h in hdr:
            _f.write(f'{_h}\n')
    # if _e > 1:
    #     break
    # data.to_csv(DATADIR/f'{row.org}_{idx}_data.csv')



Processing site 10352500 (1/743)
104148_00060 to discharge
104148_00060_cd to discharge


 '# Some of the data that you have obtained from this U.S. Geological Survey database may not ',
 "# have received Director's approval.  Any such data values are qualified as provisional and ",
 '# are subject to revision.  Provisional data are released on the condition that neither the ',
 '# USGS nor the United States Government may be held liable for any damages resulting from its use.',
 '#  Go to http://help.waterdata.usgs.gov/policies/provisional-data-statement for more information.',
 '#',
 '# File-format description:  http://help.waterdata.usgs.gov/faq/about-tab-delimited-output',
 '# Automated-retrieval info: http://help.waterdata.usgs.gov/faq/automated-retrievals',
 '#',
 '# Contact:   gs-w_support_nwisweb@usgs.gov',
 '# retrieved: 2025-12-15 16:25:49 -05:00\t(nadww02)',
 '#',
 '# Data for the following 1 site(s) are contained in this file',
 '#    USGS 10352500 MCDERMITT CK NR MCDERMITT, NV',
 '# -------------------------------------------------------------------------------

Unnamed: 0_level_0,agency_cd,site_no,discharge_cfs,discharge_qual,stage_ft,stage_qual
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-10-31 23:00:00-07:00,USGS,10352500,4.66,P,2.28,P
2025-10-31 23:15:00-07:00,USGS,10352500,4.66,P,2.28,P
2025-10-31 23:30:00-07:00,USGS,10352500,4.66,P,2.28,P
2025-10-31 23:45:00-07:00,USGS,10352500,4.66,P,2.28,P
2025-11-01 00:00:00-07:00,USGS,10352500,4.66,P,2.28,P
...,...,...,...,...,...,...
2025-12-15 12:00:00-08:00,USGS,10352500,3.77,P,2.23,P
2025-12-15 12:15:00-08:00,USGS,10352500,3.77,P,2.23,P
2025-12-15 12:30:00-08:00,USGS,10352500,3.62,P,2.22,P
2025-12-15 12:45:00-08:00,USGS,10352500,3.77,P,2.23,P


Processing site 10387110 (2/743)
Processing site 10387150 (3/743)


KeyError: 'tz_cd'