<p><img src="https://www.carkeekwatershed.org/wp-content/uploads/CWCAPlogo-1.png" width="213" height="72"></p>

# CWCAP Salmon Survey Analysis
Press run all to create all figures and generate a report

In [None]:
import requests, json, matplotlib.pyplot as plt, plotly.express as px, pandas as pd, IPython.core.display as ip, sys, pytz, io, base64
from datetime import datetime
from IPython.core.debugger import set_trace
import matplotlib.dates as mdates
mostRecentSurvey = '2021'
timeStampPacific = datetime.now(pytz.timezone('America/Los_Angeles')).strftime('%Y-%m-%d_%H-%M-%S') #cannot use system time due to colab
reportFileName = timeStampPacific + '_salmonReport.html'
surveyURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f454667a28',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f454667a28',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f454667a28'}
salmonURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f509867795',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f509867795',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f509867795'}
IN_COLAB = 'google.colab' in sys.modules

def addHTMLToReport(html):
    with open(reportFileName, 'a') as f:
      f.write(html)
    
def addFigureToReport():
    IObytes = io.BytesIO()
    plt.savefig(IObytes, format = 'png')
    IObytes.seek(0)
    encodedPlot = base64.b64encode(IObytes.read()).decode("utf-8")
    html = '<img src=\'data:image/png;base64,{}\'>'.format(encodedPlot)
    addHTMLToReport(html)
    
def clearPreviousReports():
    if (not IN_COLAB):
        import os, glob
        from os.path import exists
        for name in glob.glob('*salmonReport.html'):
            print("Previous report file exists. Deleting " + name)
            os.remove(name)

def getEntries(uri):
    request = requests.get(url = uri  + '&per_page=1000')
    df = pd.DataFrame(pd.json_normalize(request.json()['data']['entries']))
    while request.json()['links']['next'] is not None:
        nextURL = request.json()['links']['next']
        request = requests.get(url = nextURL)
        df = df.append(pd.json_normalize(request.json()['data']['entries']))
    return df

def filterSalmonDf(salmonDf, *filters):
    for filter in filters:
        field, value = filter
        salmonDf = salmonDf.loc[(salmonDf[field] == value)]
    return salmonDf
    
def addStats(survey, salmonDf):
    surveyUUID = survey['ec5_uuid']
    todaySalmonDf = salmonDf[salmonDf['ec5_parent_uuid'] == surveyUUID]
    deadChum = filterSalmonDf(todaySalmonDf, ('Species', 'Chum'), ('Type', 'Dead')).shape[0] + filterSalmonDf(todaySalmonDf, ('Species', 'Chum'), ('Type', 'Remnant')).shape[0]
    deadCoho = filterSalmonDf(todaySalmonDf, ('Species', 'Coho'), ('Type', 'Dead')).shape[0] + filterSalmonDf(todaySalmonDf, ('Species', 'Coho'), ('Type', 'Remnant')).shape[0]
    deadUnknown = filterSalmonDf(todaySalmonDf, ('Species', 'Unknown'), ('Type', 'Dead')).shape[0] + filterSalmonDf(todaySalmonDf, ('Species', 'Unknown'), ('Type', 'Remnant')).shape[0]
    liveChum = filterSalmonDf(todaySalmonDf, ('Species', 'Chum'), ('Type', 'Live')).shape[0]
    liveCoho = filterSalmonDf(todaySalmonDf, ('Species', 'Coho'), ('Type', 'Live')).shape[0]
    liveUnknown = filterSalmonDf(todaySalmonDf, ('Species', 'Unknown'), ('Type', 'Live')).shape[0]
    liveSeaRunCutthroat = filterSalmonDf(todaySalmonDf, ('Species', 'Sea-run Cutthroat'), ('Type', 'Live')).shape[0]
    liveResidentCutthroat = filterSalmonDf(todaySalmonDf, ('Species', 'Resident Cutthroat'), ('Type', 'Live')).shape[0]
    liveCutthroat = liveSeaRunCutthroat + liveResidentCutthroat
    deadSeaRunCutthroat = filterSalmonDf(todaySalmonDf, ('Species', 'Sea-run Cutthroat'), ('Type', 'Dead')).shape[0] + filterSalmonDf(todaySalmonDf, ('Species', 'Sea-run Cutthroat'), ('Type', 'Remnant')).shape[0]
    deadResidentCutthroat = filterSalmonDf(todaySalmonDf, ('Species', 'Resident Cutthroat'), ('Type', 'Dead')).shape[0] + filterSalmonDf(todaySalmonDf, ('Species', 'Resident Cutthroat'), ('Type', 'Remnant')).shape[0]
    deadCutthroat = deadSeaRunCutthroat + deadResidentCutthroat
    redds = filterSalmonDf(todaySalmonDf, ('Type', 'Redd')).shape[0]
    totalDead = deadChum + deadCoho + deadSeaRunCutthroat + deadUnknown
    totalLiveSalmon = liveChum + liveCoho + liveUnknown
    total = totalDead + totalLiveSalmon #does not include redds or resident cutthroat
    return pd.Series({'Survey Date': pd.Timestamp(survey['Survey_Date']), 'Dead Chum': deadChum, 'Dead Coho': deadCoho, 'Live Chum': liveChum, 'Live Coho': liveCoho, 'Live Cutthroat': liveCutthroat, 'Dead Cutthroat': deadCutthroat, 'Redds': redds, 'Total Dead' : totalDead, 'Total Live Salmon': totalLiveSalmon, 'Total': total})

clearPreviousReports()
reportHeader = '<p><img src="https://www.carkeekwatershed.org/wp-content/uploads/CWCAPlogo-1.png" width="213" height="72"></p><p><h2>CWCAP Salmon Survey Report</h2></p><p><h5>generated {}</h5></p>'.format(timeStampPacific)
addHTMLToReport('''<style>body {
  font-family: Arial, Helvetica, sans-serif;
  border-collapse: collapse;
  width: 100%;
}

table {
    border-collapse:collapse;
    caption-side: bottom;
}

caption {text-align:left;}

td, th {
  border: 1px solid #ddd;
  padding: 8px;
}

tbody tr {text-align: center;}

tr:nth-child(even){background-color: #f2f2f2;}

tr:hover {background-color: #ddd;}

th {
  padding-top: 12px;
  padding-bottom: 12px;
  text-align: center;
  background-color: #15331c;
  color: white;
}</style>''')
addHTMLToReport(reportHeader)
salmonDf = getEntries(salmonURIs.get(mostRecentSurvey))
surveyDf = getEntries(surveyURIs.get(mostRecentSurvey))
statsDf = surveyDf.apply(addStats, axis=1, args=[salmonDf]).groupby(['Survey Date']).sum().reset_index()
columnOrder =  ['Survey Date', 'Total Live Salmon', 'Live Chum', 'Live Coho', 'Dead Chum', 'Dead Coho', 'Total Dead', 'Live Cutthroat', 'Dead Cutthroat', 'Redds']
statsDf = statsDf.sort_values(by=['Survey Date'])[columnOrder]
kwargs = {'Dead to Date' : statsDf['Total Dead'].cumsum(), 'Dead Chum to Date' : statsDf['Dead Chum'].cumsum(), 'Dead Coho to Date' : statsDf['Dead Coho'].cumsum()}
statsDf = statsDf.assign(**kwargs)
surveyDfs = {};
salmonDfs = {};
statsDfs = {};
for year in surveyURIs:
    surveyDfs[year] = getEntries(surveyURIs[year])
for year in salmonURIs:
    salmonDfs[year] = getEntries(salmonURIs[year])
for year in surveyDfs:
    if not salmonDfs[year].empty:
        statsDfs[year] = surveyDfs[year].apply(addStats, axis=1, args=[salmonDfs[year]]).groupby(['Survey Date']).sum().reset_index()
        statsDfs[year]['Survey Date'] = statsDfs[year]['Survey Date'].apply(lambda x: datetime.strptime(x.strftime("%m-%d"), "%m-%d"))

In [None]:
%%HTML
<style>
table {
    caption-side: bottom;
}
caption {text-align:left;}
</style>

### Survey Stats Table

In [None]:
def displaySurveyStatsTable(statsDf):
    title = '<h3>Survey Totals</h3>'
    statsTable = statsDf[['Survey Date', 'Total Live Salmon', 'Live Chum', 'Dead Chum', 'Live Coho', 'Dead Coho', 'Live Cutthroat', 'Dead Cutthroat', 'Redds']].style.format({'Survey Date': "{:%Y-%m-%d}"}).hide_index().set_caption("Note: Total live salmon includes Sea-Run Cutthroat and Unknowns - entries where surveyors were unable to determine the species. Remnants are included in dead counts.").render()
    allHTML = title + statsTable
    display(ip.HTML(allHTML))
    addHTMLToReport(allHTML)
    
def printYearlyTotals(statsDf):
    max = (statsDf['Dead to Date']+statsDf['Total Live Salmon']).max()
    idxmax = (statsDf['Dead to Date']+statsDf['Total Live Salmon']).idxmax()
    yearlySalmonTotal = int(max)
    yearlyChumTotal = int((statsDf['Dead Chum to Date']+statsDf['Live Chum']).max())
    yearlyCohoTotal = int((statsDf['Dead Coho to Date']+statsDf['Live Coho']).max())
    surveyOfCalculation = statsDf.iloc[idxmax]['Survey Date']
    yearlyTitle='<h3>Yearly Totals</h3>'
    yearlyTotals=pd.DataFrame({'Salmon': yearlySalmonTotal, 'Chum': yearlyChumTotal, 'Coho': yearlyCohoTotal, 'Based on survey': surveyOfCalculation}, index=[0])
    yearlyTotalsTable=yearlyTotals[['Salmon', 'Chum', 'Coho', 'Based on survey']].style.format({'Based on survey': "{:%Y-%m-%d}"}).hide_index().set_caption("Note: Yearly total is the survey date where (day\'s live + dead up to date) is max").render()
    ### DEBUGGING
    #print('Based on total dead count:', int(statsDf.iloc[idxmax]['Dead to Date']), '|', 'plus total live count:', int(statsDf.iloc[idxmax]['Total Live Salmon']))
    mergedDf = pd.merge(salmonDf, surveyDf, how='inner', left_on = 'ec5_parent_uuid', right_on = 'ec5_uuid')
    reddsTitle = '<h3>Redds by location</h3>'
    reddsTable = filterSalmonDf(mergedDf, ('Type', 'Redd'))[['Stream', 'Distance', 'Survey_Date']].sort_values(by=['Stream', 'Distance']).style.hide_index().render()
    allHTML = yearlyTitle + yearlyTotalsTable + reddsTitle + reddsTable
    display(ip.HTML(allHTML))
    addHTMLToReport(allHTML)
displaySurveyStatsTable(statsDf)
printYearlyTotals(statsDf)

### Count Plot

In [None]:
def displayCountPlot(statsDf):
    plot = statsDf.plot(ylabel = 'Count', title = mostRecentSurvey + ' Fish Count', rot=45, xticks=statsDf['Survey Date'], y=['Total Dead', 'Live Chum', 'Dead Chum', 'Live Coho', 'Dead Coho'], x='Survey Date')
    plot.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d"))
    addFigureToReport()
displayCountPlot(statsDf)

### Scatter Map

In [None]:
def displayScatterMap(salmonDf, figureTitle):    
    fig = px.scatter_mapbox(salmonDf, lat='Location.latitude', lon='Location.longitude', color='Type', labels={'Type':'Type'}, color_discrete_map={'Live': 'green', 'Redd': 'red', 'Dead': 'black'},
                        center=dict(lat=47.71157, lon=-122.3759), zoom=15, hover_name = 'title', hover_data = ['Location.accuracy'],
                        mapbox_style='stamen-terrain', title=figureTitle)
    fig.layout.coloraxis.showscale = False
    fig.show()
    addHTMLToReport(fig.to_html(reportFileName))
    
def dropInaccurateLocations(salmonDf):
    return salmonDf.loc[pd.to_numeric(salmonDf['Location.accuracy']) < 50]
mergedDf = pd.merge(salmonDf, surveyDf, how='inner', left_on = 'ec5_parent_uuid', right_on = 'ec5_uuid', suffixes=(None, '_y'))
mergedDf.sort_values(by=['Survey_Date'])
mostRecentSurveyDate = mergedDf.iloc[0]['Survey_Date']
displayScatterMap(dropInaccurateLocations(filterSalmonDf(mergedDf, ('Survey_Date', mostRecentSurveyDate))), mostRecentSurveyDate + ' Fish Scatter Map')
displayScatterMap(dropInaccurateLocations(salmonDf), mostRecentSurvey + ' Fish Scatter Map')  

### Spawned Chart

In [None]:
def displaySpawnedChart(salmonDf, species):
    try:
        deadDf = filterSalmonDf(salmonDf, ('Species', species), ('Type', 'Dead'))
        totalDead = deadDf.shape[0]
        totalSpawned = filterSalmonDf(deadDf, ('Spawning_Success', 'Spawned')).shape[0]
        totalPartialSpawned = filterSalmonDf(deadDf, ('Spawning_Success', 'Partially spawned')).shape[0]
        totalUnspawned = filterSalmonDf(deadDf, ('Spawning_Success', 'Unspawned')).shape[0]
        totalUnknownSpawned = filterSalmonDf(deadDf, ('Spawning_Success', 'Unknown')).shape[0]
        spawnPercentages = [totalSpawned/totalDead*100, totalPartialSpawned/totalDead*100, totalUnspawned/totalDead*100, totalUnknownSpawned/totalDead*100]
        colorsList = {'green' : 'Spawned', 'yellow' : 'Partially Spawned', 'red' : 'Unspawned', 'gray' : 'Unknown'}
        pie, ax1 = plt.subplots()
        ax1.pie(spawnPercentages, colors = colorsList, labels=['Spawned', 'Partially Spawned', 'Unspawned', 'Unknown'], autopct='%1.1f%%')
        plt.title(mostRecentSurvey + ' ' + species + ' Spawn Percentage')
        addFigureToReport()
        plt.show()
    except ZeroDivisionError:
        print('Unable to display spawned chart. No spawning data to display')
displaySpawnedChart(salmonDf, 'Chum')
displaySpawnedChart(salmonDf, 'Coho')

### Year By Year Count

In [None]:
def displayYearByYearCount(surveyURIs, salmonURIs):

#plot each series
    fig, ax = plt.subplots()
    plt.plot('Survey Date', 'Total', data=statsDfs['2019'], label="2019")
    plt.plot('Survey Date', 'Total', data=statsDfs['2020'], label="2020")
    plt.plot('Survey Date', 'Total', data=statsDfs['2021'], label="2021")
    plt.title('Count by time of year')
    plt.ylabel('Count')
    plt.xlabel('Survey Date')
    plt.xticks(rotation = 45)
    plt.legend()
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    addFigureToReport()
displayYearByYearCount(surveyURIs, salmonURIs)

In [None]:
#Generate HTML report if in google colab
def generateReport():
    if (IN_COLAB):
      from google.colab import files
      files.download(reportFileName)
generateReport()