<p><img src="https://www.carkeekwatershed.org/wp-content/uploads/CWCAPlogo-1.png" width="213" height="72"></p>

# CWCAP Salmon Survey Analysis
Press run all to create all figures and generate a report

In [None]:
###ENTER SURVEY YEAR###
surveyYear = '2021'

##INITIALIZATION
import matplotlib.pyplot as plt, plotly.express as px, pandas as pd, sys, pytz, io, base64, matplotlib.dates as mdates
from jinja2 import Environment, FileSystemLoader
from datetime import datetime

currentTimePacific = datetime.now(pytz.timezone('America/Los_Angeles')).strftime('%Y-%m-%d_%H-%M-%S') #cannot use system time due to colab
reportFileName = currentTimePacific + '_salmonReport.html'

surveyURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f454667a28',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f454667a28',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f454667a28'}
salmonURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f509867795',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f509867795',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f509867795'}
IN_COLAB = 'google.colab' in sys.modules

def getFigureAsHTML():
    IObytes = io.BytesIO()
    plt.savefig(IObytes, format = 'png')
    IObytes.seek(0)
    encodedPlot = base64.b64encode(IObytes.read()).decode("utf-8")
    return '<img src=\'data:image/png;base64,{}\'>'.format(encodedPlot)

def clearPreviousReports():
    if (not IN_COLAB):
        import os, glob
        from os.path import exists
        for name in glob.glob('*salmonReport.html'):
            print("Previous report file exists. Deleting " + name)
            os.remove(name)
clearPreviousReports()

In [None]:
#clone all repo files - main branch only
if IN_COLAB:
  !git clone https://github.com/slfisco/salmonNotebook.git

In [None]:
###DATA FETCHING
import requests, json
def getEntries(uri):
    request = requests.get(url = uri  + '&per_page=1000')
    df = pd.DataFrame(pd.json_normalize(request.json()['data']['entries']))
    while request.json()['links']['next'] is not None:
        nextURL = request.json()['links']['next']
        request = requests.get(url = nextURL)
        df = df.append(pd.json_normalize(request.json()['data']['entries']))
    return df

def addStats(survey, salmonDf):
    surveyUUID = survey['ec5_uuid']
    todaySalmonDf = salmonDf[salmonDf['ec5_parent_uuid'] == surveyUUID]
    remnantChum = todaySalmonDf.query('`Species` == "Chum" and `Type` == "Remnant"').shape[0]
    deadChum = todaySalmonDf.query('`Species` == "Chum" and `Type` == "Dead"').shape[0] + remnantChum
    remnantCoho = todaySalmonDf.query('`Species` == "Coho" and `Type` == "Remnant"').shape[0]
    deadCoho = todaySalmonDf.query('`Species` == "Coho" and `Type` == "Dead"').shape[0] + remnantCoho
    unknownRemnant = todaySalmonDf.query('`Species` == "Unknown" and `Type` == "Remnant"').shape[0]
    deadUnknown = todaySalmonDf.query('`Species` == "Unknown" and `Type` == "Dead"').shape[0] + unknownRemnant
    liveChum = todaySalmonDf.query('`Species` == "Chum" and `Type` == "Live"').shape[0]
    liveCoho = todaySalmonDf.query('`Species` == "Coho" and `Type` == "Live"').shape[0]
    liveUnknown = todaySalmonDf.query('`Species` == "Unknown" and `Type` == "Live"').shape[0]
    todaySalmonDf.query('`Species` == "Sea-run Cutthroat" and `Type` == "Live"').shape[0]
    liveSeaRunCutthroat = todaySalmonDf.query('`Species` == "Sea-run Cutthroat" and `Type` == "Live"').shape[0]
    liveResidentCutthroat = todaySalmonDf.query('`Species` == "Resident Cutthroat" and `Type` == "Live"').shape[0]
    liveCutthroat = liveSeaRunCutthroat + liveResidentCutthroat
    remnantSeaRunCutthroat = todaySalmonDf.query('`Species` == "Sea-run Cutthroat" and `Type` == "Remnant"').shape[0]
    deadSeaRunCutthroat = todaySalmonDf.query('`Species` == "Sea-run Cutthroat" and `Type` == "Dead"').shape[0] + remnantSeaRunCutthroat
    remnantResidentCutthroat = todaySalmonDf.query('`Species` == "Resident Cutthroat" and `Type` == "Remnant"').shape[0]
    deadResidentCutthroat = todaySalmonDf.query('`Species` == "Resident Cutthroat" and `Type` == "Dead"').shape[0] + remnantResidentCutthroat
    deadCutthroat = deadSeaRunCutthroat + deadResidentCutthroat
    redds = todaySalmonDf.query('`Type` == "Redd"').shape[0]
    #totals include sea run cutthroat but not resident or redds
    totalDeadSalmon = deadChum + deadCoho + deadSeaRunCutthroat + deadUnknown
    totalLiveSalmon = liveChum + liveCoho + liveUnknown + liveSeaRunCutthroat
    total = totalDeadSalmon + totalLiveSalmon
    return pd.Series({'Survey Date': pd.Timestamp(survey['Survey_Date']), 'Dead Chum': deadChum, 'Dead Coho': deadCoho, 'Live Chum': liveChum, 'Live Coho': liveCoho, 'Live Cutthroat': liveCutthroat, 'Dead Cutthroat': deadCutthroat, 'Redds': redds, 'Total Dead Salmon' : totalDeadSalmon, 'Total Live Salmon': totalLiveSalmon, 'Total': total})

def getSurveyEntries():
    surveyDfs = {}
    for year in surveyURIs:
        surveyDfs[year] = getEntries(surveyURIs[year])
    return surveyDfs

def getSalmonEntries():
    salmonDfs = {}
    for year in salmonURIs:
        salmonDfs[year] = getEntries(salmonURIs[year])
    return salmonDfs
    
def getStats(surveyDfs, salmonDfs):
    statsDfs = {}
    for year in surveyDfs:
        if not salmonDfs[year].empty:
            statsDfs[year] = surveyDfs[year].apply(addStats, axis=1, args=[salmonDfs[year]]).groupby(['Survey Date']).sum().reset_index() 
            kwargs = {'Dead to Date' : statsDfs[year]['Total Dead Salmon'].cumsum(), 'Dead Chum to Date' : statsDfs[year]['Dead Chum'].cumsum(), 'Dead Coho to Date' : statsDfs[year]['Dead Coho'].cumsum()}
            statsDfs[year] = statsDfs[year].assign(**kwargs)
    return statsDfs

surveyDfs = getSurveyEntries()
salmonDfs = getSalmonEntries()
statsDfs = getStats(surveyDfs, salmonDfs)
salmonDf = salmonDfs[surveyYear]
surveyDf = surveyDfs[surveyYear]
statsDf = statsDfs[surveyYear]

### Survey Stats Table

In [None]:
###TABLES
import IPython.core.display as ip

def displayInNotebook(table):
    display(ip.HTML(table))
    
def getSurveyStatsTable():
    statsTable = statsDf[['Survey Date', 'Total Live Salmon', 'Total Dead Salmon', 'Live Chum', 'Dead Chum', 'Live Coho', 'Dead Coho', 'Live Cutthroat', 'Dead Cutthroat', 'Redds']].style.format({'Survey Date': "{:%Y-%m-%d}"}).hide_index().set_caption("Note: Total live salmon includes Sea-Run Cutthroat and Unknowns - entries where surveyors were unable to determine the species. Remnants are included in dead counts.").render()
    displayInNotebook(statsTable)
    return statsTable
    
def getYearlyTotals():
    max = (statsDf['Dead to Date']+statsDf['Total Live Salmon']).max()
    idxmax = (statsDf['Dead to Date']+statsDf['Total Live Salmon']).idxmax()
    yearlySalmonTotal = int(max)
    yearlyChumTotal = int((statsDf['Dead Chum to Date']+statsDf['Live Chum']).max())
    yearlyCohoTotal = int((statsDf['Dead Coho to Date']+statsDf['Live Coho']).max())
    surveyOfCalculation = statsDf.iloc[idxmax]['Survey Date']
    ### DEBUGGING
    #print('Based on total dead count:', int(statsDf.iloc[idxmax]['Dead to Date']), '|', 'plus total live count:', int(statsDf.iloc[idxmax]['Total Live Salmon']))
    return pd.DataFrame({'Salmon': yearlySalmonTotal, 'Chum': yearlyChumTotal, 'Coho': yearlyCohoTotal, 'Based on survey': surveyOfCalculation}, index=[0])
    
def getYearlyTotalsTable():
    yearlyTotals = getYearlyTotals()
    yearlyTotalsTable=yearlyTotals[['Salmon', 'Chum', 'Coho', 'Based on survey']].style.format({'Based on survey': "{:%Y-%m-%d}"}).hide_index().set_caption("Note: Yearly total is the survey date where (day\'s live + dead up to date) is max").render()
    displayInNotebook(yearlyTotalsTable)
    return yearlyTotalsTable
    
def getReddsTable():
    mergedDf = pd.merge(salmonDf, surveyDf, how='inner', left_on = 'ec5_parent_uuid', right_on = 'ec5_uuid')
    reddsTable = mergedDf.query('`Type` == "Redd"')[['Stream', 'Distance', 'Survey_Date']].sort_values(by=['Stream', 'Distance']).style.hide_index().render()
    displayInNotebook(reddsTable)
    return reddsTable
    
surveyTotals = getSurveyStatsTable()
yearlyTotalsTable = getYearlyTotalsTable()
reddsTable = getReddsTable()

### Count Plot

In [None]:
def getCountPlot(statsDf):
    plot = statsDf.plot(ylabel = 'Count', title = surveyYear + ' Fish Count', rot=45, xticks=statsDf['Survey Date'], y=['Total Dead Salmon', 'Total Live Salmon', 'Live Chum', 'Dead Chum', 'Live Coho', 'Dead Coho'], x='Survey Date')
    plot.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d"))
    return getFigureAsHTML()
countPlot = getCountPlot(statsDf)

### Scatter Map

In [None]:
def getScatterMap(salmonDf, figureTitle):
    if locationDataAvailable(salmonDf):
        fig = px.scatter_mapbox(salmonDf, lat='Location.latitude', lon='Location.longitude', color='Type', labels={'Type':'Type'}, color_discrete_map={'Live': 'green', 'Redd': 'red', 'Dead': 'black'},
                        center=dict(lat=47.71157, lon=-122.3759), zoom=15, hover_name = 'title', hover_data = ['Location.accuracy'],
                        mapbox_style='stamen-terrain', title=figureTitle)
        fig.layout.coloraxis.showscale = False
        fig.show()
        return fig.to_html(reportFileName)

def locationDataAvailable(salmonDf):
    return len(salmonDf.value_counts('Location.latitude') > 0)
    
def dropInaccurateLocations(salmonDf):
    return salmonDf.loc[pd.to_numeric(salmonDf['Location.accuracy']) < 50]

mergedDf = pd.merge(salmonDf, surveyDf, how='inner', left_on = 'ec5_parent_uuid', right_on = 'ec5_uuid', suffixes=(None, '_y'))
mergedDf.sort_values(by=['Survey_Date'])
mostRecentSurveyDate = mergedDf.iloc[0]['Survey_Date']
lastSurveySalmon = mergedDf.query('`Survey_Date` == @mostRecentSurveyDate')
mostRecentScatterMap = getScatterMap(dropInaccurateLocations(lastSurveySalmon), mostRecentSurveyDate + ' Fish Scatter Map')
yearScatterMap = getScatterMap(dropInaccurateLocations(salmonDf), surveyYear + ' Fish Scatter Map')  

### Spawned Chart

In [None]:
def getPieChart(df, column, colorsList, title):
    if (df.shape[0] > 0):
        value_counts = df.value_counts(column)
        value_counts.drop(labels='', errors='ignore', inplace= True)
        pie, ax1 = plt.subplots()
        ax1.pie(pd.Series(value_counts), colors = [colorsList[p] for p in value_counts.index.values], labels= value_counts.index.values, autopct='%1.1f%%')
        plt.title(title)
        return getFigureAsHTML()
    else:
        print('Unable to display spawned chart. No spawning data to display')
    
def getPredationChart(df, title):
    predationColors = {'Eye loss': 'teal', 'No':'pink', 'Unknown':'grey', 'Yes':'orange'}
    return getPieChart(df, 'Predation', predationColors, title)
    
def getSpawningChart(df, title):
    spawningColors = {'Partially spawned':'yellow', 'Spawned':'green', 'Unknown':'gray', 'Unspawned':'red'}
    return getPieChart(df, 'Spawning_Success', spawningColors, title)

deadChumDf = salmonDf.query('`Type` == "Dead" and `Species` == "Chum"')
deadCohoDf = salmonDf.query('`Type` == "Dead" and `Species` == "Coho"')
deadChumFemaleDf = deadChumDf.query('`Sex` == "Female"')
deadChumMaleDf = deadChumDf.query('`Sex` == "Male"')
chumPredationChart = getPredationChart(deadChumDf, surveyYear + ' Chum Predation')
cohoPredationChart = getPredationChart(deadCohoDf, surveyYear + ' Coho Predation')
chumSpawningChart = getSpawningChart(deadChumDf, surveyYear + ' Chum Spawn Percentage')
cohoSpawningChart = getSpawningChart(deadCohoDf, surveyYear + ' Coho Spawn Percentage')
chumFemaleSpawningChart = getSpawningChart(deadChumFemaleDf, surveyYear + ' Chum Female Spawn Percentage')
chumMaleSpawningChart = getSpawningChart(deadChumMaleDf, surveyYear + ' Chum Male Spawn Percentage')
pieCharts = [chumPredationChart, cohoPredationChart, chumSpawningChart, cohoSpawningChart, chumFemaleSpawningChart, chumMaleSpawningChart]

### Year By Year Count

In [None]:
def getYearByYearCountPlot(surveyURIs, salmonURIs):
#plot each series
    strippedStatsDfs = statsDfs.copy()
    for year in surveyDfs:
        strippedStatsDfs[year]['Survey Date'] = statsDfs[year]['Survey Date'].apply(lambda x: datetime.strptime(x.strftime("%m-%d"), "%m-%d")) 
    fig, ax = plt.subplots()
    plt.plot('Survey Date', 'Total', data=strippedStatsDfs['2019'], label="2019")
    plt.plot('Survey Date', 'Total', data=strippedStatsDfs['2020'], label="2020")
    plt.plot('Survey Date', 'Total', data=strippedStatsDfs['2021'], label="2021")
    plt.title('Count by time of year')
    plt.ylabel('Count')
    plt.xlabel('Survey Date')
    plt.xticks(rotation = 45)
    plt.legend()
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    return getFigureAsHTML()
yearByYearCountPlot = getYearByYearCountPlot(surveyURIs, salmonURIs)

In [None]:
#Generate HTML report from template
env = Environment(loader=FileSystemLoader('templates'))
template = env.get_template('report_template.html')
reportData = {
    'reportGenTime': currentTimePacific,
    'surveyTotals': surveyTotals,
    'yearlyTotalsTable': yearlyTotalsTable,
    'reddsTable' : reddsTable,
    'countPlot' : countPlot,
    'pieCharts' : pieCharts,
    'yearByYearCountPlot' : yearByYearCountPlot,
    'mostRecentScatterMap' : mostRecentScatterMap,
    'yearScatterMap' : yearScatterMap
}
html = template.render(reportData)
with open(reportFileName, 'w') as f:
    f.write(html)

In [None]:
#Download HTML report if in google colab
if (IN_COLAB):
    from google.colab import files
    files.download(reportFileName)