In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
import requests
from matplotlib.backends.backend_pdf import PdfPages

# Introduction to python for hydrologists &mdash; pandas


## create a url to read in a single station

In [None]:
url='http://waterservices.usgs.gov/nwis/gwlevels/?format=rdb,1.0&sites=430429089230301&startDT=1880-01-01&endDT=2018-01-01&siteType=GW'

In [None]:
dv_file = requests.get(url)

with open(os.path.join('data',"430429089230301.dat"), 'w') as ofp:
    for carp in dv_file:
        ofp.write(carp.decode())

In [None]:
import os
import numpy as np
NWISfilename = os.path.join('data',"430429089230301.dat")
reconnoiter = open(NWISfilename, 'r').readlines()
for i in np.arange(60):
    print (reconnoiter[i].rstrip())

In [None]:
numhash = 0 #let's use the as the counter
for line in reconnoiter:
    if line.startswith('#'):
        numhash +=1
    else:
        break
        
print (numhash)

# Read in a time series of groundwater levels

In [None]:
colnames = reconnoiter[numhash].rstrip().split()

In [None]:
nwis_df = pd.read_csv(url,sep='\t',
                          skiprows = numhash+2,
                          names = colnames,
                          parse_dates = True,
                          index_col = 3)

In [None]:
nwis_df.head()

## get rid of columns that are all NaN

In [None]:
nwis_df.dropna(axis=1,thresh=len(nwis_df), inplace=True)

In [None]:
nwis_df.head()

In [None]:
nwis_df.lev_va.plot()

## resample

In [None]:
nwis_df.lev_va.resample('M').mean().plot(style='.')

In [None]:
nwis_df.lev_va.resample('A').mean().plot(style='.')

## aggregate

In [None]:
fig = plt.figure(figsize=(12,4))

mean_lev = nwis_df.lev_va.groupby(nwis_df.index.year).mean()
lower_CI = mean_lev - 2*nwis_df.lev_va.groupby(nwis_df.index.year).std()
upper_CI = mean_lev + 2*nwis_df.lev_va.groupby(nwis_df.index.year).std()
ax = mean_lev.plot(style='r.-')
plt.fill_between(lower_CI.index,lower_CI,upper_CI, color='r',alpha = 0.2)

In [None]:
fig = plt.figure(figsize=(12,4))
nwis_df.lev_va.groupby(nwis_df.index.year).count().plot(kind='bar',rot=45)

## navigate

In [None]:
nwis_df.loc[nwis_df.index.year<1950].lev_va.plot()

In [None]:
nwis_df.loc[(nwis_df.index.year<1950) & (nwis_df.index.year>1948)].lev_va.plot()

## set values

In [None]:
nwis_df.loc[(nwis_df.index.year<1950) & (nwis_df.index.year>1948), 'lev_va'] += 100

In [None]:
nwis_df.lev_va.plot()

## groupby

In [None]:
with PdfPages(os.path.join('data','allyears.pdf')) as outpdf:
    for cname,cgroup in nwis_df.groupby(nwis_df.index.year):
        print(cname)
        plt.figure()
        cgroup.lev_va.plot(title=cname)
        outpdf.savefig()
        plt.close('all')
    

In [None]:
cgroup