<img src="https://www.carkeekwatershed.org/wp-content/uploads/CWCAPlogo-1.png" style="width:213px;height:72px;float:left;">

# <center> CWCAP Salmon Survey Analysis</center>
Press run all to generate all figures.
### Table of contents
- [Survey Stats Table](#SurveyStatsTable)
- [Count Plot](#CountPlot)
- [Scatter Map](#ScatterMap)
- [Spawned Chart](#SpawnedChart)
- [Year By Year Count](#YearByYearCount)

In [None]:
import requests, json, matplotlib.pyplot as plt, plotly.express as px, pandas as pd, IPython.core.display as ip
from datetime import datetime
from IPython.core.debugger import set_trace
import matplotlib.dates as mdates
mostRecentSurvey = '2020'
surveyURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f454667a28',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f454667a28',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f454667a28'}
salmonURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f509867795',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f509867795',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f509867795'}
        
def getEntries(uri):
    request = requests.get(url = uri)
    df = pd.DataFrame(pd.json_normalize(request.json()['data']['entries']))
    while request.json()['links']['next'] is not None:
        nextURL = request.json()['links']['next']
        request = requests.get(url = nextURL)
        df = df.append(pd.json_normalize(request.json()['data']['entries']))
    return df

def filterSalmonDf(salmonDf, *filters):
    for filter in filters:
        field, value = filter
        salmonDf = salmonDf.loc[(salmonDf[field] == value)]
    return salmonDf
    
def addStats(survey, salmonDf):
    surveyUUID = survey['ec5_uuid']
    todaySalmonDf = salmonDf[salmonDf['ec5_parent_uuid'] == surveyUUID]
    deadChum = filterSalmonDf(todaySalmonDf, ('Species', 'Chum'), ('Type', 'Dead')).shape[0]
    deadCoho = filterSalmonDf(todaySalmonDf, ('Species', 'Coho'), ('Type', 'Dead')).shape[0]
    liveChum = filterSalmonDf(todaySalmonDf, ('Species', 'Chum'), ('Type', 'Live')).shape[0]
    liveCoho = filterSalmonDf(todaySalmonDf, ('Species', 'Coho'), ('Type', 'Live')).shape[0]
    remnant = filterSalmonDf(todaySalmonDf, ('Type', 'Remnant')).shape[0]
    cutthroat = filterSalmonDf(todaySalmonDf, ('Species', 'Cutthroat')).shape[0]
    totalDead = todaySalmonDf.loc[(todaySalmonDf['Type'] == 'Dead')].shape[0]
    totalLiveSalmon = liveChum + liveCoho
    total = (todaySalmonDf.shape[0])
    return pd.Series({'Survey Date': datetime.strptime(survey['Survey_Date'], '%m/%d/%Y').date(), 'Dead Chum': deadChum, 'Dead Coho': deadCoho, 'Live Chum': liveChum, 'Live Coho': liveCoho, 'Total Dead': totalDead, 'Total Live Salmon': totalLiveSalmon, 'Total': total, 'Remnant': remnant, 'Cutthroat': cutthroat})

salmonDf = getEntries(salmonURIs.get(mostRecentSurvey))
surveyDf = getEntries(surveyURIs.get(mostRecentSurvey))
statsDf = surveyDf.apply(addStats, axis=1, args=[salmonDf]).groupby(['Survey Date']).sum().reset_index()
columnOrder =  ['Survey Date', 'Total', 'Total Live Salmon', 'Total Dead', 'Live Chum', 'Live Coho', 'Dead Chum', 'Dead Coho', 'Remnant', 'Cutthroat']
statsDf = statsDf.sort_values(by=['Survey Date'])[columnOrder]
kwargs = {'Dead to Date' : statsDf['Total Dead'].cumsum(), 'Dead Chum to Date' : statsDf['Dead Chum'].cumsum(), 'Dead Coho to Date' : statsDf['Dead Coho'].cumsum()}
statsDf = statsDf.assign(**kwargs)
surveyDfs = {};
salmonDfs = {};
statsDfs = {};
for year in surveyURIs:
    surveyDfs[year] = getEntries(surveyURIs[year])
for year in salmonURIs:
    salmonDfs[year] = getEntries(salmonURIs[year])
for year in surveyDfs:
    if not salmonDfs[year].empty:
        statsDfs[year] = surveyDfs[year].apply(addStats, axis=1, args=[salmonDfs[year]]).groupby(['Survey Date']).sum().reset_index()
        statsDfs[year]['Survey Date'] = statsDfs[year]['Survey Date'].apply(lambda x: datetime.strptime(x.strftime("%m-%d"), "%m-%d"))

<a id='SurveyStatsTable'></a>

In [None]:
def displaySurveyStatsTable(statsDf):
    display(ip.HTML(statsDf[['Survey Date', 'Total', 'Total Live Salmon', 'Total Dead', 'Live Chum', 'Live Coho', 'Dead Chum', 'Dead Coho', 'Remnant', 'Cutthroat']].to_html()))

def printYearlyTotals(statsDf):
    max = (statsDf['Dead to Date']+statsDf['Total Live Salmon']).max()
    idxmax = (statsDf['Dead to Date']+statsDf['Total Live Salmon']).idxmax()
    print('Yearly salmon total is:', int(max))
    print('Yearly total of Chum is:', int((statsDf['Dead Chum to Date']+statsDf['Live Chum']).max()))
    print('Yearly total of Coho is:', int((statsDf['Dead Coho to Date']+statsDf['Live Coho']).max()))
    print('Calculated based on date:', statsDf.iloc[idxmax]['Survey Date'])
    print('Based on total dead count:', int(statsDf.iloc[idxmax]['Dead to Date']), '|', 'plus total live count:', int(statsDf.iloc[idxmax]['Total Live Salmon']))
    
displaySurveyStatsTable(statsDf)
printYearlyTotals(statsDf)

<a id='CountPlot'></a>

In [None]:
def displayCountPlot(statsDf):    
    plot = statsDf.plot(ylabel = 'Count', title = mostRecentSurvey + ' Fish Count', rot=45, y=['Total', 'Total Dead', 'Live Chum', 'Dead Chum', 'Live Coho', 'Dead Coho'], x='Survey Date')
displayCountPlot(statsDf)

<a id='ScatterMap'></a>

In [None]:
def displayScatterMap(salmonDf):    
    fig = px.scatter_mapbox(salmonDf, lat='Location.latitude', lon='Location.longitude', color='Type', labels={'Type':'Type'}, color_discrete_sequence=['blue', 'green', 'black'],
                        center=dict(lat=47.71157, lon=-122.3759), zoom=15, hover_name = 'title', hover_data = ['Location.accuracy'],
                        mapbox_style='stamen-terrain', title=mostRecentSurvey + ' Fish Heat Map')
    fig.layout.coloraxis.showscale = False
    fig.show()
displayScatterMap(salmonDf)

<a id='SpawnedChart'></a>

In [None]:
def displaySpawnedChart(salmonDf):
    pie, ax1 = plt.subplots()
    deadChumDf = filterSalmonDf(salmonDf, ('Species', 'Chum'), ('Type', 'Dead'))
    totalDeadChum = deadChumDf.shape[0]
    totalSpawnedChum = filterSalmonDf(deadChumDf, ('Spawning_Success', 'Spawned')).shape[0]
    totalPartialSpawnedChum = filterSalmonDf(deadChumDf, ('Spawning_Success', 'Partially spawned')).shape[0]
    totalUnspawnedChum = filterSalmonDf(deadChumDf, ('Spawning_Success', 'Unspawned')).shape[0]
    totalUnknownSpawnedChum = filterSalmonDf(deadChumDf, ('Spawning_Success', 'Unknown')).shape[0]
    spawnPercentages = [totalSpawnedChum/totalDeadChum*100, totalPartialSpawnedChum/totalDeadChum*100, totalUnspawnedChum/totalDeadChum*100, totalUnknownSpawnedChum/totalDeadChum*100]
    ax1.pie(spawnPercentages, labels=['Spawned', 'Partially Spawned', 'Unspawned', 'Unknown'], autopct='%1.1f%%')
    plt.title(mostRecentSurvey + ' Chum Spawn Percentage')
    plt.show()
displaySpawnedChart(salmonDf)

<a id='YearByYearCount'></a>

In [None]:
def displayYearByYearCount(surveyURIs, salmonURIs):

#plot each series
    fig, ax = plt.subplots()
    plt.plot('Survey Date', 'Total', data=statsDfs['2019'], label="2019")
    plt.plot('Survey Date', 'Total', data=statsDfs['2020'], label="2020")
    plt.plot('Survey Date', 'Total', data=statsDfs['2021'], label="2021")
    plt.title('Count by time of year')
    plt.ylabel('Count')
    plt.xlabel('Survey Date')
    plt.xticks(rotation = 45)
    plt.legend()
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))

displayYearByYearCount(surveyURIs, salmonURIs)