In [None]:
import dataretrieval
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

## this is a great overview of the main pandas functionality
https://pandas.pydata.org/docs/user_guide/10min.html

## quick grab some data from NWIS - let's get the CO River at Lee's Ferry

In [None]:
site = '09380000'

In [None]:
df = dataretrieval.get_record(sites=site, service='dv', start='1922-10-01',end='2023-09-19')
df.head()

In [None]:
pmcodes = dataretrieval.get_record(service='pmcodes',parameterCd=['00060','00095','80154'])

In [None]:
pmcodes

## we can access data by label(column), the index, locations, or attributes using booleans

In [None]:
df.T

In [None]:
df['00095_Mean']

In [None]:
df.iloc[1]

In [None]:
df.loc[df.index.year==1922]

### let's subset to only the columns we care about

In [None]:
df = df[['site_no','00060_Mean']]
df

## and rename them to better names

In [None]:
df = df.rename(columns={"00060_Mean":"Q_cfs"})
df

## we can see summary attributes

In [None]:
df.describe()

In [None]:
df['Q_cfs'].mean()

In [None]:
df.value_counts()

## make a quick plot

In [None]:
df.Q_cfs.plot()

## or a histogram

In [None]:
df.Q_cfs.hist(bins=50)

## drop to excel

In [None]:
df.to_excel('mydata.xlsx')

https://stackoverflow.com/questions/16628819/convert-pandas-timezone-aware-datetimeindex-to-naive-timestamp-but-in-certain-t

In [None]:
df.index = pd.DatetimeIndex([i.replace(tzinfo=None) for i in df.index])

In [None]:
df.to_excel('mydata.xlsx')

## and read it back in

In [None]:
df = pd.read_excel('mydata.xlsx', usecols="a,C", index_col=0)

In [None]:
df

In [None]:
df = pd.read_excel('mydata.xlsx', index_col=0)

In [None]:
df.groupby(df.index.year)['Q_cfs'].min()

In [None]:
df.groupby(df.index.year)['Q_cfs'].min().plot.bar()

In [None]:
df.groupby(df.index.year)['Q_cfs'].min().plot.bar(figsize=(14,4))

In [None]:
df.groupby(df.index.year)['Q_cfs'].min().plot.bar(figsize=(14,4))

In [None]:
df.groupby(df.index.year)['Q_cfs'].max().plot.bar(figsize=(10,4))

## Finally we can plot a multipage PDF with the yearly hydrograph plotted one year per page (nice!)

In [None]:
with PdfPages('annual_hydrographs.pdf') as outpdf:
    for cn, cg in df.groupby(df.index.year):
        plt.figure()
        cg.Q_cfs.plot()
        plt.title(f"Mean Daily Discharge for year {cn}")
        outpdf.savefig()
        plt.close('all')
        print(cn)