In [8]:
import requests
def get_usgs_gw1(state, start_year, end_year):
    where = "stateCd={}".format(state)
    waterservices = "https://waterservices.usgs.gov/nwis/gwlevels/?format=json&siteStatus=active&startDT=%s-01-01&endDT=%s-12-31&siteType=GW&parameterCd=72019&%s" % (start_year, end_year, where)
    #try:
    req = requests.post(waterservices) 
    return req.json()
    #except:
        #return {"error": "Error on USGS access"}

# get daily groundwater value for stations in a state (1900-present) = this gets daily means
def get_usgs_gw2(state, start_year):
    where = "stateCd={}".format(state)
    waterservices = "https://waterservices.usgs.gov/nwis/dv/?format=json&siteStatus=active&startDT=%s-01-01&endDT=2024-01-03&siteType=GW&parameterCd=72019&%s" % (start_year, where)
    try:
        req = requests.post(waterservices) 
        return req.json()
    except:
        return {"error": "Error on USGS access 2"}

In [7]:
# Date collection

def get_gw_date(data, site, index):
    return data["value"]["timeSeries"][site]["values"][0]["value"][index]["dateTime"][0:10]
# Value
def get_gw_value(data, site, index):
    return float(data["value"]["timeSeries"][site]["values"][0]["value"][index]["value"])
# Site name
def get_site(data, site):
    return data["value"]["timeSeries"][site]["sourceInfo"]["siteName"]
# Coordinates
def get_coordinates(data, site):
    coordinates = [data["value"]["timeSeries"][site]["sourceInfo"]["geoLocation"]["geogLocation"]["latitude"], data["value"]["timeSeries"][site]["sourceInfo"]["geoLocation"]["geogLocation"]["longitude"]]
    return coordinates
# Show all data for a site
def show_site_data(data, site, print_data):
    graph = {}
    values_len = len(data["value"]["timeSeries"][site]["values"][0]["value"])
    if values_len == 0:
        return 0
    for i in range(values_len):
        if print_data:
            print(f"   {get_gw_date(data, site, i)}:  {get_gw_value(data, site, i)}")
        graph[get_gw_date(data, site, i)] = get_gw_value(data, site, i)
    return graph


In [6]:
# Get monthly data at a station over a year
import numpy as np
import matplotlib.pyplot as plt
MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', "Dec"]

def gw_one_year(data, state, start_date, index_of_site, do_graph):
    new_data = {}
    if do_graph:
        print('data in %s' % (state))
        print(f"Site: {get_site(data, index_of_site)}   {get_coordinates(data, index_of_site)}")
    graph = show_site_data(data, index_of_site, False)
    for i in range(len(graph)):
        if list(graph.keys())[i][0:4] == start_date:
            new_data[list(graph.keys())[i]] = list(graph.values())[i]
    if new_data == 0:
        print("No data for this year at this station")
        return 0
    month_data = [0,0,0,0,0,0,0,0,0,0,0,0]
    month_data_2 = [0,0,0,0,0,0,0,0,0,0,0,0]
    # Organize data into months
    for i in range(len(new_data)):
        month_data[int(list(new_data.keys())[i][5:7]) - 1] += float(list(new_data.values())[i])
        month_data_2[int(list(new_data.keys())[i][5:7]) - 1] += 1
    for i in range(len(new_data)):
        month_data[int(list(new_data.keys())[i][5:7]) - 1] /= month_data_2[int(list(new_data.keys())[i][5:7]) - 1]
    if do_graph:
        plt.bar(MONTHS, month_data, color = "blue", width = 0.8)
        if len(list(new_data.values())) != 0:
            mean = sum(list(new_data.values())) / len(list(new_data.values()))
            print(f"Mean = {mean:.2f}")
        plt.xlabel("Date")
        plt.ylabel("Value")
        plt.title("Groundwater in %s" % (start_date))
        plt.show()
    else:
        return [month_data]



In [5]:
# Get trend over years for the same month, same station

import numpy as np
import matplotlib.pyplot as plt

MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', "Dec"]

# MODIFY THESE
state = "NY"
start_date = "1980"
end_date = "2022"
month = "Apr"
index_of_site = 5

def gw_month_over_years(data, state, start_date, end_date, month, index_of_site, do_graph):
    if do_graph:
        print('data in %s' % (state))
        print(f"Site: {get_site(data, index_of_site)}   {get_coordinates(data, index_of_site)}")
    graph = show_site_data(data, index_of_site, False)
    values = {}
    accumulator = []
    for y in range(int(end_date) - int(start_date)):
        accumulator.append(1)
        values[str(int(start_date) + y)] = 0
    for date in graph.keys():
        year = date[0:4]
        month2 = int(date[5:7])
        if month2 == (MONTHS.index(month) + 1):
            if year in values:
                values[year] += graph[date]
                accumulator[int(year) - int(start_date)] += 1
    for y in range(int(end_date) - int(start_date)):
        if str(y + int(start_date)) in values:
            values[str(y + int(start_date))] /= accumulator[y]

    #remove zeros for line of best fit
    lst = [range(int(end_date) - int(start_date)), list(values.values())]
    lst2 = []
    lst3 = []

    for j in range(int(end_date) - int(start_date)):
        if lst[1][j] != 0:
            lst2.append(lst[0][j])
            lst3.append(lst[1][j])
    x = np.array(lst2)
    y = np.array(lst3)
    #find line of best fit
    a, b = np.polyfit(x, y, 1)
    #add points to plot
    #add line of best fit to plot
    if do_graph:
        plt.plot(x, a*x+b)
        plt.bar(values.keys(), values.values(), color = "green", width = 0.8)
        plt.xlabel("Date")
        plt.ylabel("Value")
        plt.title("Groundwater in %s between %s and %s" % (month, start_date, end_date))
        plt.show()
        print(f"Slope of best fit: {a:.4f}")
    else:
        return [x, y, values.keys(), values.values()]

In [84]:
# Shows 12 month average of one year compared to average over decades
MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', "Dec"]

def compare_to_mean(data, state, selected_year, start_date, end_date, index_of_site, do_graph):
    # scatter plot points for every month over years
    # calculate average for every month over time frame
    # take input of monthy for that year (get_one_year)
    # scatter graph all the years, line draw the average and line draw the selected year
    '''ArithmeticErrorprint('data in %s' % (state))
    print(f"Site: {get_site(data, index_of_site)}   {get_coordinates(data, index_of_site)}")
    graph = show_site_data(data, index_of_site, False)'''
    full_data = []
    month_avg = []
    for months in range(12):
        accumulator = 0
        mean = 0
        mon_data = list(gw_month_over_years(data, state, start_date, end_date, (MONTHS[months]), index_of_site, False)[3])
        for value in mon_data:
            if value > 0:
                accumulator += 1
                mean += value
        if accumulator > 0:
            mean = mean / accumulator
        month_avg.append(mean)
        full_data.append(mon_data)
    # full data is [jan] [feb]
    #print(gw_one_year(data, state, start_date, index_of_site, False))
    #MEAN CALCULATED, PLOT MEAN
    #PLOT ALL YEARS JAN-DEC
    #PLOT TARGET YEAR
    plt.plot(MONTHS, month_avg, color = "green")
    for j in range((int(end_date) - int(start_date))):
        curr_data = []  
        for i in range(12):
            curr_data.append(full_data[i][j])
            if curr_data[-1] == 0:
                curr_data[-1] = None
        if int(selected_year) == (int(start_date) + j):
            plt.plot(MONTHS, curr_data, color = "red")
        else:
            plt.scatter(MONTHS, curr_data, color = "blue")
    plt.xlabel("Date")
    plt.ylabel("Value")
    plt.title("Groundwater in %s compared to average (%s to %s)" % (selected_year, start_date, end_date))
    plt.show()

In [107]:
# Avg every day
MON_ACCUM = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]

def daily_analysis(data, state, date, start_date, end_date, index_of_site):
    graph = show_site_data(data, index_of_site, False)
    numerical_form = {}
    # have dictionary of dates and values
    # make list of dates in numerical format
        # thousands place is year, 0 is start year.
    # turn specified date into number
    # for each year:
    # Calculate range for numbers
    # Do less than and greater than and search list
        # Accumulate and average, add to new dictionary
        # Apply to that year (year format)
    for i in graph:
        val = (int(i[0:4]) - int(start_date)) * 1000
        val += int(i[8:]) + MON_ACCUM[int(i[5:7]) - 1]
        numerical_form[val] = graph[i]
    date2 = int(date[3:]) + MON_ACCUM[int(date[0:2]) - 1]
    date_range = [0,0]
    if date2 + 7 > 365:
        date_range[1] = 1000 + date2 + 7 - 365
    else:
        date_range[1] = date2 + 7
    if date2 - 7 < 1:
        date_range[0] = -1000 - 365 + 7 - date2
    else:
        date_range[0] = date2 - 7
    new_items = {}
    for item in numerical_form:
        if int(str(item)[-3:]) > int(str((date_range)[0])[-3:]) and int(str(item)[-3:]) < int(str((date_range)[1])[-3:]):
            new_items[item] = numerical_form[item]
    #go through list, if they have the same year, average them to new dict, otherwise, add to new dict (year form)
    year_form = {}
    for year in range((int(end_date) - int(start_date))):
        accumulator = 0
        total = 0
        for item in new_items.keys():
            if item // 1000 == (year):
                accumulator += 1
                total += new_items[item]
        if accumulator > 0:
            total /= accumulator
        else:
            total = None
        key = int(start_date) + year
        year_form[str(key)] = total
    print(year_form)
    plt.scatter(list(year_form.keys()), list(year_form.values()), color = "blue")
    if len(list(year_form.values())) != 0:
        accumulator = 0
        mean = 0
        for i in list(year_form.values()):
            if i != None:
                mean += i
                accumulator += 1
        mean /= accumulator
        print(f"Mean = {mean:.2f}")
        plt.axhline(mean)
    plt.xlabel("Date")
    plt.ylabel("Value")
    plt.title("Groundwater for days (2 week average around %s)" % (date))
    plt.show()

In [None]:
state = "NY"
start_date = "2000"
end_date = "2022"
date = "10-02"

data = get_usgs_gw1(state, start_date, end_date)
compare_to_mean(data, state, "2020", start_date, end_date, 1, True)
gw_month_over_years(data, state, start_date, end_date, "Feb", 1, True)
gw_one_year(data, state, start_date, 1, True)
daily_analysis(data, state, date, start_date, end_date, 1)