In [1]:
import pandas as pd
import numpy as np
from Data_Utils import *

# Get Stream Gauge Locations

## Gage Height / Flood Stage

In [None]:
# set directory for saving stream gauge location files (.txt and .csv)
gage_height_directory = r'stream_gauges'

# call custom function for getting location data for flood stage data; return path to .csv file for further analysis
gage_height_locations_path = get_stream_gauge_locations(gage_height_directory, data='gage height')

### Dataframe of Locations

In [None]:
# read .csv file of stream gauge locations from custom function
df_gage_height_locations = pd.read_csv(gage_height_locations_path)

# drop the first row (non-data)
df_gage_height_locations.drop(index=0, 
                              inplace=True)

# cast latitude, longitude, and altitude to float values
df_gage_height_locations = df_gage_height_locations.astype(dtype={'dec_lat_va':np.float32, 
                                                                  'dec_long_va':np.float32, 
                                                                  'alt_va': np.float32})

# drop unnecessary columns
df_gage_height_locations.drop(columns=['coord_acy_cd', 
                                       'alt_acy_va', 
                                       'alt_datum_cd', 
                                       'huc_cd', 
                                       'basin_cd'], 
                              inplace=True)

# rename columns
df_gage_height_locations.rename(columns={'dec_lat_va':'lat', 
                                         'dec_long_va':'long', 
                                         'dec_coord_datum_cd':'datum',
                                         'alt_va':'altitude', 
                                         'rt_bol':'realtime_data'}, 
                                inplace=True)

# reset index
df_gage_height_locations.reset_index(drop=True, 
                                     inplace=True)

# display first 10 rows of dataframe
df_gage_height_locations.head(10)

In [None]:
# display dtype and null/non-null counts
df_gage_height_locations.info()

## Streamflow / Discharge

In [2]:
# set directory for saving stream gauge location files (.txt and .csv)
streamflow_directory = r'stream_gauges'

# call custom function for getting location data for flood stage data; return path to .csv file for further analysis
streamflow_locations_path = get_stream_gauge_locations(streamflow_directory, data='streamflow')

In [3]:
# read in .csv file data & clean dataframe (same process as above)...
df_streamflow_locations = pd.read_csv(streamflow_locations_path)

df_streamflow_locations.drop(index=0, 
                             inplace=True)

df_streamflow_locations = df_streamflow_locations.astype(dtype={'dec_lat_va':np.float32, 
                                                                'dec_long_va':np.float32, 
                                                                'alt_va': np.float32})
df_streamflow_locations.drop(columns=['coord_acy_cd',
                                      'alt_acy_va',
                                      'alt_datum_cd',
                                      'huc_cd',
                                      'basin_cd'], 
                             inplace=True)

df_streamflow_locations.rename(columns={'dec_lat_va':'lat', 
                                        'dec_long_va':'long', 
                                        'dec_coord_datum_cd':'datum', 
                                        'alt_va':'altitude', 
                                        'rt_bol':'realtime_data'}, 
                               inplace=True)

df_streamflow_locations.reset_index(drop=True, 
                                    inplace=True)

df_streamflow_locations.head(10)

Unnamed: 0,site_no,station_nm,lat,long,datum,altitude,realtime_data
0,3207965,"GRAPEVINE CREEK NEAR PHYLLIS, KY",37.432606,-82.35376,NAD83,797.190002,1
1,3209410,"RUSSELL FORK AT CEDARVILLE, KY",37.312954,-82.359558,NAD83,743.400024,1
2,3209500,"LEVISA FORK AT PIKEVILLE, KY",37.464268,-82.52626,NAD83,631.26001,1
3,3209800,"LEVISA FORK AT PRESTONSBURG, KY",37.670929,-82.777107,NAD83,587.380005,1
4,3210000,"JOHNS CREEK NEAR META, KY",37.567043,-82.457924,NAD83,714.52002,1
5,3212500,"LEVISA FORK AT PAINTSVILLE, KY",37.815372,-82.79155,NAD83,566.179993,1
6,3213700,"TUG FORK AT WILLIAMSON, WV",37.673157,-82.280144,NAD83,619.909973,1
7,3215410,"BLAINE CREEK NEAR BLAINE, KY",38.029533,-82.846832,NAD83,632.460022,1
8,3216070,"OHIO RIVER AT IRONTON, OH",38.532055,-82.685944,NAD83,480.410004,1
9,3216500,"LITTLE SANDY RIVER AT GRAYSON, KY",38.330082,-82.939339,NAD83,557.030029,1


In [4]:
# display dtype and null/non-null counts
df_streamflow_locations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 228 entries, 0 to 227
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   site_no        228 non-null    object 
 1   station_nm     228 non-null    object 
 2   lat            228 non-null    float32
 3   long           228 non-null    float32
 4   datum          228 non-null    object 
 5   altitude       193 non-null    float32
 6   realtime_data  228 non-null    object 
dtypes: float32(3), object(4)
memory usage: 9.9+ KB


# Get Stream Gage Data

## Gage Height / Flood Stage

In [None]:
# set directory for saving stream gauge data file (.csv)
gage_height_data_directory = r'stream_gauges/gauge_height'

# iterate through stream gauge id's to get gage height/flood stage measurements
for site in df_gage_height_locations['site_no']:
    get_stream_gage_data(site, gage_height_data_directory, data='gage height')

## Streamflow / Discharge

In [5]:
# set directory for saving stream gauge data file (.csv)
streamflow_data_directory = r'stream_gauges/streamflow'

# iterate through stream gauge id's to get streamflow/discharge measurements
for site in df_streamflow_locations['site_no']:
    get_stream_gage_data(site, streamflow_data_directory, data='streamflow')