In [17]:
import os
import sys
import requests
import pandas as pd

In [2]:
codemap = {
    "08353000" : "BNDN5",
    "06468250" : "ARWN8",
    "11523200" : "TCCC1",
    "07301500" : "CARO2",
    "06733000" : "ESSC2", "BTABESCO" : "ESSC2", # USGS only has data through 1998
    "11427000" : "NFDC1",
    "09209400" : "LABW4",
    "06847900" : "CLNK1",
    "09107000" : "TRAC2",
    "06279940" : "NFSW4"
}

In [15]:
stations = list(codemap)

In [42]:
new_column_names = ["agency_cd", 
                    "site_no", 
                    "parameter_cd", 
                    "ts_id", 
                    "loc_web_ds", 
                    "month_nu", 
                    "day_nu", 
                    "begin_yr", 
                    "end_yr", 
                    "count_nu", 
                    "mean_va"]

In [55]:
# generated using the help of
# https://waterservices.usgs.gov/rest/Statistics-Service-Test-Tool.html
# Gets the daily mean discharge for every day in the year for a list of sites
# Only generates statistics if there is no missing data

# CURRENTLY IS ONLY GETTING DAILY MEANS FOR THE AREAS OF INTEREST
url = "https://waterservices.usgs.gov/nwis/stat/?format=rdb,1.0&sites=08353000,06468250,11523200,07301500,06733000,11427000,09209400,06847900,09107000,06279940&statReportType=daily&statTypeCd=mean&parameterCd=00060"

In [56]:
daily_means = pd.read_csv(url, header = 1, sep = '\t', comment = "#", dtype=object, names = new_column_names)

In [57]:
daily_means

Unnamed: 0,agency_cd,site_no,parameter_cd,ts_id,loc_web_ds,month_nu,day_nu,begin_yr,end_yr,count_nu,mean_va
0,USGS,06279940,00060,161216,,1,1,1990,2019,30,153
1,USGS,06279940,00060,161216,,1,2,1990,2019,30,152
2,USGS,06279940,00060,161216,,1,3,1990,2019,30,154
3,USGS,06279940,00060,161216,,1,4,1990,2019,30,152
4,USGS,06279940,00060,161216,,1,5,1990,2019,30,148
...,...,...,...,...,...,...,...,...,...,...,...
3655,USGS,11523200,00060,11886,,12,27,1958,2020,63,269
3656,USGS,11523200,00060,11886,,12,28,1958,2020,63,335
3657,USGS,11523200,00060,11886,,12,29,1958,2020,63,331
3658,USGS,11523200,00060,11886,,12,30,1958,2020,63,347


In [58]:
daily_means = daily_means.astype({"month_nu": 'int32', "day_nu": "int32", "begin_yr": "int32", "end_yr": "int32", "count_nu": "int32", "mean_va": "float64"})

In [59]:
daily_means

Unnamed: 0,agency_cd,site_no,parameter_cd,ts_id,loc_web_ds,month_nu,day_nu,begin_yr,end_yr,count_nu,mean_va
0,USGS,06279940,00060,161216,,1,1,1990,2019,30,153.0
1,USGS,06279940,00060,161216,,1,2,1990,2019,30,152.0
2,USGS,06279940,00060,161216,,1,3,1990,2019,30,154.0
3,USGS,06279940,00060,161216,,1,4,1990,2019,30,152.0
4,USGS,06279940,00060,161216,,1,5,1990,2019,30,148.0
...,...,...,...,...,...,...,...,...,...,...,...
3655,USGS,11523200,00060,11886,,12,27,1958,2020,63,269.0
3656,USGS,11523200,00060,11886,,12,28,1958,2020,63,335.0
3657,USGS,11523200,00060,11886,,12,29,1958,2020,63,331.0
3658,USGS,11523200,00060,11886,,12,30,1958,2020,63,347.0


In [54]:
daily_means.dtypes

agency_cd        object
site_no          object
parameter_cd     object
ts_id            object
loc_web_ds       object
month_nu          int32
day_nu            int32
begin_yr          int32
end_yr            int32
count_nu          int32
mean_va         float64
dtype: object

In [60]:
daily_means.to_csv(path_or_buf = "daily_means.csv")

In [61]:
daily_means["mean_va"].describe()

count    3660.000000
mean      418.867571
std       811.574450
min         0.360000
25%        30.000000
50%        93.000000
75%       453.000000
max      6280.000000
Name: mean_va, dtype: float64

In [63]:
daily_means["mean_va"].isnull().sum()

0