In [None]:
### DB AND TABLE SETUP
import sqlite3
import pandas as pd
from IPython.display import display, HTML

connection = sqlite3.connect(":memory:")
cursor = connection.cursor()

surveyURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f454667a28&per_page=1000',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f454667a28&per_page=1000',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f454667a28&per_page=1000',
              '2022':'https://five.epicollect.net/api/export/entries/salmon-survey-2022?form_ref=d46b5d8451f8410ea407bae5c8eb9f49_5d6f454667a28&per_page=1000'}
salmonURIs = {'2019':'https://five.epicollect.net/api/export/entries/salmon-survey-2019?form_ref=397fba6ecc674b74836efc190840c42d_5d6f509867795&per_page=1000',
              '2020':'https://five.epicollect.net/api/export/entries/salmon-survey-2020?form_ref=f550ab6c4dab44f49bcc33b7c1904be9_5d6f509867795&per_page=1000',
              '2021':'https://five.epicollect.net/api/export/entries/salmon-survey-2021?form_ref=ad5ffedf0a3246a18934e6ec36ed9569_5d6f509867795&per_page=1000',
              '2022':'https://five.epicollect.net/api/export/entries/salmon-survey-2022?form_ref=d46b5d8451f8410ea407bae5c8eb9f49_5d6f509867795&per_page=1000',
              '2023':'https://kf.kobotoolbox.org/api/v2/assets/a6dEG7tnrtwjrmituAdL5k/data/?format=json'}


create_salmon_table_query = '''
    CREATE TABLE IF NOT EXISTS salmon (
        _id STRING PRIMARY KEY,
        Survey_Date DATE,
        year DATE,
        Quantity INTEGER,
        Distance INTEGER,
        Stream TEXT,
        Type TEXT,
        Species TEXT,
        Predation TEXT,
        Length FLOAT,
        Width FLOAT,
        Spawned TEXT,
        Sex TEXT
    );
'''
cursor.execute(create_salmon_table_query)

In [None]:
### DATA LOADING
import requests
from datetime import datetime

salmon_insert_query = '''
        INSERT OR IGNORE INTO salmon (
        _id,
        Survey_Date,
        year,
        Quantity,
        Distance,
        Stream,
        Type,
        Species,
        Predation,
        Length,
        Width,
        Spawned,
        Sex
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
    '''

def getData(uri):
    response = requests.get(uri)
    return response.json()

## for epicollect data to associate salmon to a survey date
def getSurveyDates(uri):
    surveyDates = {}
    data = getData(uri)
    for entry in data['data']['entries']:
        surveyDate = datetime.strptime(entry['Survey_Date'], "%m/%d/%Y").strftime("%Y-%m-%d")
        surveyDates[entry['ec5_uuid']] = surveyDate
    return surveyDates
    
def processEntries(entries, isEpicollect, year, surveyDates):
    for entry in entries:
        values = (
            entry.get('ec5_uuid') if isEpicollect else entry.get('_id'),
            surveyDates[entry.get('ec5_parent_uuid')] if isEpicollect else entry.get('Survey_Date'),
            year,
            entry.get('Quantity', 1),
            entry.get('Distance', None),
            entry.get('Stream', None),
            entry.get('Type', None),
            entry.get('Species', None),
            entry.get('Predation', None),
            entry.get('Length_Inches') if isEpicollect else entry.get("Length", None),
            entry.get('Width_Inches') if isEpicollect else entry.get("Width", None),
            entry.get('Spawning_Success') if isEpicollect else entry.get("Spawned", None),
            entry.get('Sex', None)
        )
        cursor.execute(salmon_insert_query, values)
        
def loadSurveyYear(year):
    print(f'loading for year: {year}')
    uri = salmonURIs[year]
    isEpicollect = "epicollect" in uri
    surveyDates = getSurveyDates(surveyURIs[year]) if isEpicollect else None
    allDataInserted = False
    while not allDataInserted:
        data = getData(uri)
        entries = data['data']['entries'] if isEpicollect else data['results']
        processEntries(entries, isEpicollect, year, surveyDates)
        uri = data['links']['next'] if isEpicollect else data['next']
        allDataInserted = True if uri is None else False
        
print('loading salmon into database')        
for year in salmonURIs:
    loadSurveyYear(year)

In [None]:
### STATS BY SURVEY TABLE
import IPython.core.display as ip, matplotlib.dates as mdates
stats_by_survey_query = '''
SELECT
    Survey_Date,
    COUNT(CASE WHEN Species in ('Chum', 'Coho', 'Unknown', 'Sea-run_Cutthroat') AND Type = 'Live' THEN _id END) AS total_live_salmon_count,
    COUNT(CASE WHEN Species in ('Chum', 'Coho', 'Unknown', 'Sea-run_Cutthroat') AND Type in ('Dead', 'Remnant') THEN _id END) AS total_dead_salmon_count,
    COUNT(CASE WHEN Species = 'Chum' AND Type in ('Dead', 'Remnant') THEN _id END) AS dead_chum_count,
    COUNT(CASE WHEN Species = 'Coho' AND Type in ('Dead', 'Remnant') THEN _id END) AS dead_coho_count,
    COUNT(CASE WHEN Species = 'Unknown' AND Type in ('Dead', 'Remnant') THEN _id END) AS dead_unknown_count,
    COUNT(CASE WHEN Species = 'Chum' AND Type = 'Live' THEN _id END) AS live_chum_count,
    COUNT(CASE WHEN Species = 'Coho' AND Type = 'Live' THEN _id END) AS live_coho_count,
    COUNT(CASE WHEN Species in ('Resident_Cutthroat', 'Sea-run_Cutthroat') AND Type = 'Live' THEN _id END) as live_cutthroat_count,
    COUNT(CASE WHEN Type = 'Redd' THEN _id END) AS redds_count
FROM
    salmon
WHERE
    Species IN ('Coho', 'Chum', 'Sea-run_Cutthroat') and year = '2023'
GROUP BY
    Survey_Date;
'''
df = pd.read_sql(stats_by_survey_query, connection)
display(ip.HTML(df.to_html(index=False)))
df['Survey_Date'] = pd.to_datetime(df['Survey_Date'])
plot = df.plot(ylabel = 'Count', title = 'Fish Count', rot=45, xticks=df['Survey_Date'], y=['total_dead_salmon_count', 'total_live_salmon_count', 'live_chum_count', 'dead_chum_count', 'live_coho_count', 'dead_coho_count'], x='Survey_Date')
plot.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d"))

In [None]:
### REDDS TABLE. USED TO HELP SURVEY TEAM AVOID REDDS
redds_table_query = '''
SELECT
    Stream, Distance, Survey_Date
FROM
    salmon
WHERE Type = 'Redd'
'''
df = pd.read_sql(redds_table_query, connection)
display(ip.HTML(df.to_html(index=False)))

In [None]:
import IPython.core.display as ip
### SPAWN SUCCESS
spawning_query = '''
SELECT
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'spawned' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS spawned_chum_ratio,
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'unspawned' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS unspawned_chum_ratio,
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'partially_spawned' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS partial_spawn_chum_ratio,
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'unknown' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS unknown_spawn_chum_ratio
FROM
    salmon
'''
df = pd.read_sql(spawning_query, connection)
ax = df.plot(kind='barh', stacked=True)
display(ip.HTML(df.to_html(index=False)))

In [None]:
### USER INPUT QUERY
done = False
while not done:
    try:
        query = input("Enter a query: ")
        print("entering query: " + query)
        cursor.execute(query)
        print(cursor.fetchall())
    except sqlite3.Error as e:
        print("SQLite error:", e)

In [None]:
!jupyter --version