In [None]:
### DB AND TABLE SETUP
import sqlite3
import pandas as pd
from IPython.display import display, HTML

connection = sqlite3.connect(":memory:")
cursor = connection.cursor()

salmonURIs = {'2023':'https://kf.kobotoolbox.org/api/v2/assets/aREJxDMcV3uPwP9Q82vT3k/data/?format=json'}

create_salmon_table_query = '''
    CREATE TABLE IF NOT EXISTS salmon (
        _id INTEGER PRIMARY KEY,
        Survey_Date DATE,
        year DATE,
        Quantity INTEGER,
        Distance INTEGER,
        Stream TEXT,
        Type TEXT,
        Species TEXT,
        Probable_Predation TEXT,
        Length FLOAT,
        Width FLOAT,
        Spawned TEXT,
        Sex TEXT,
        Location TEXT
    );
'''
cursor.execute(create_salmon_table_query)

In [None]:
### DATA LOADING
import requests
salmon_insert_query = '''
        INSERT OR IGNORE INTO salmon (
        _id,
        Survey_Date,
        year,
        Quantity,
        Distance,
        Stream,
        Type,
        Species,
        Probable_Predation,
        Length,
        Width,
        Spawned,
        Sex,
        Location
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
    '''

print('loading salmon into database')
for year in salmonURIs:
    print('loading for year: ' + year)
    uri = salmonURIs[year]
    allDataInserted = False
    while not allDataInserted:
        response = requests.get(uri)
        data = response.json()
        for entry in data['results']:
            values = (
                entry['_id'] if '_id' in entry else None,
                entry['Survey_Date'] if 'Survey_Date' in entry else None,
                year,
                entry['Quantity'] if 'Quantity' in entry else None,
                entry['Distance'] if 'Distance' in entry else None,
                entry['Stream'] if 'Stream' in entry else None,
                entry['Type'] if 'Type' in entry else None,
                entry['Species'] if 'Species' in entry else None,
                entry['Probable_Predation'] if 'Probable_Predation' in entry else None,
                entry['Length'] if 'Length' in entry else None,
                entry['Width'] if 'Width' in entry else None,
                entry['Spawned'] if 'Spawned' in entry else None,
                entry['Sex'] if 'Sex' in entry else None,
                entry['Location'] if 'Location' in entry else None
            )
            cursor.execute(salmon_insert_query, values)
        if data['next'] is None:
            allDataInserted = True
        else:
            uri = data['links']['next']

In [None]:
### STATS BY SURVEY TABLE
import IPython.core.display as ip
stats_by_survey_query = '''
SELECT
    Survey_Date,
    COUNT(CASE WHEN Species in ('chum', 'coho', 'unknown', 'sea-run_cutthroat') AND Type = 'live' THEN _id END) AS total_live_salmon_count,
    COUNT(CASE WHEN Species in ('chum', 'coho', 'unknown', 'sea-run_cutthroat') AND Type in ('dead', 'remnant') THEN _id END) AS total_dead_salmon_count,
    COUNT(CASE WHEN Species = 'chum' AND Type in ('dead', 'remnant') THEN _id END) AS dead_chum_count,
    COUNT(CASE WHEN Species = 'coho' AND Type in ('dead', 'remnant') THEN _id END) AS dead_coho_count,
    COUNT(CASE WHEN Species = 'unknown' AND Type in ('dead', 'remnant') THEN _id END) AS dead_unknown_count,
    COUNT(CASE WHEN Species = 'chum' AND Type = 'live' THEN _id END) AS live_chum_count,
    COUNT(CASE WHEN Species = 'coho' AND Type = 'live' THEN _id END) AS live_coho_count,
    COUNT(CASE WHEN Species in ('resident_cutthroat', 'sea-run_cutthroat') AND Type = 'live' THEN _id END) as live_cutthroat_count,
    COUNT(CASE WHEN Type = 'redd' THEN _id END) AS redds_count
FROM
    salmon
WHERE
    Species IN ('coho', 'chum')
GROUP BY
    Survey_Date;
'''
df = pd.read_sql(stats_by_survey_query, connection)
display(ip.HTML(df.to_html(index=False)))

In [None]:
### REDDS TABLE
redds_table_query = '''
SELECT
    Stream, Distance, Survey_Date
FROM
    salmon
WHERE Type = 'Redd'
'''
df = pd.read_sql(redds_table_query, connection)
display(ip.HTML(df.to_html(index=False)))

In [None]:
import IPython.core.display as ip
### SPAWN SUCCESS
spawning_query = '''
SELECT
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'spawned' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS spawned_chum_ratio,
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'unspawned_1' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS unspawned_chum_ratio,
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'partially_spawned' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS partial_spawn_chum_ratio,
    CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' AND Spawned = 'unknown' THEN _id END) AS float) / CAST(COUNT(CASE WHEN Species = 'chum' AND Type = 'dead' THEN _id END) AS float) AS unknown_spawn_chum_ratio
FROM
    salmon
'''
df = pd.read_sql(spawning_query, connection)
display(ip.HTML(df.to_html(index=False)))

In [None]:
### USER INPUT QUERY
done = False
while not done:
    try:
        query = input("Enter a query: ")
        print("entering query: " + query)
        cursor.execute(query)
        print(cursor.fetchall())
    except sqlite3.Error as e:
        print("SQLite error:", e)

In [None]:
### CLOSE CONNECTION
connection.close()