In [53]:
# Load pandas module
import pandas as pd
import duckdb

### Step 1:
Create a query that will return the distinct species for which there is egg data (not all species and not all nests have egg data), so that you can then loop over those species. Your query should return two columns, species code and scientific name. Please order the results in alphabetic order of scientific name.

In [54]:
# Connect to database
conn = duckdb.connect("../database/database.db")
conn

<duckdb.duckdb.DuckDBPyConnection at 0x19862d29170>

In [55]:
# Create cursor
cur = conn.cursor()

In [56]:
# Check our tables are available
cur.execute("SHOW TABLES").df()

Unnamed: 0,name
0,Bird_eggs
1,Bird_nests
2,Camp_assignment
3,Crop_calendar
4,Crop_calendar_2
5,Crop_yield
6,Crop_yield_2
7,Personnel
8,Site
9,Site_avg_snowcover


In [57]:
# Let's first view the variables in the Bird_eggs table
cur.execute("PRAGMA table_info('Bird_eggs');").df()

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Book_page,VARCHAR,False,,False
1,1,Year,INTEGER,True,,False
2,2,Site,VARCHAR,True,,False
3,3,Nest_ID,VARCHAR,True,,True
4,4,Egg_num,INTEGER,True,,True
5,5,Length,FLOAT,True,,False
6,6,Width,FLOAT,True,,False


In [58]:
# View the species column
cur.execute("PRAGMA table_info('Species');").fetchdf()

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,Code,VARCHAR,True,,True
1,1,Common_name,VARCHAR,True,,False
2,2,Scientific_name,VARCHAR,False,,False
3,3,Relevance,VARCHAR,False,,False


In [59]:
# View Bird eggs data
cur.execute("SELECT * FROM Bird_eggs LIMIT 5;").fetchdf()

Unnamed: 0,Book_page,Year,Site,Nest_ID,Egg_num,Length,Width
0,b14.6,2014,eaba,14eabaage01,1,39.139999,33.0
1,b14.6,2014,eaba,14eabaage01,2,41.509998,33.389999
2,b14.6,2014,eaba,14eabaage01,3,48.290001,33.400002
3,b14.6,2014,eaba,14eabaagl01,1,47.560001,32.360001
4,b14.6,2014,eaba,14eabaagl01,2,48.130001,32.400002


In [60]:
# View Bird Nests data
cur.execute("SELECT * FROM Bird_nests LIMIT 3;").fetchdf()

Unnamed: 0,Book_page,Year,Site,Nest_ID,Species,Observer,Date_found,how_found,Clutch_max,floatAge,ageMethod
0,b14.6,2014,chur,14HPE1,sepl,vloverti,2014-06-14,,3,,
1,b11.7,2011,eaba,11eaba,wrsa,bhill,2011-07-10,searcher,4,,
2,b11.6,2011,eaba,11eabaagc01,amgp,dkessler,2011-06-24,searcher,4,6.0,float


In [61]:
cur.execute("SELECT DISTINCT Code, Scientific_name FROM Species JOIN Bird_nests " \
"ON Species.Code = Bird_nests.Species " \
"JOIN Bird_eggs ON Bird_eggs.Nest_ID = Bird_nests.Nest_ID " \
"ORDER BY Scientific_name;").fetchdf()

Unnamed: 0,Code,Scientific_name
0,rutu,Arenaria interpres
1,dunl,Calidris alpina
2,wrsa,Calidris fuscicollis
3,sepl,Charadrius semipalmatus
4,reph,Phalaropus fulicarius
5,amgp,Pluvialis dominica
6,bbpl,Pluvialis squatarola


### Step 2:
Iterate over the speices like so:

In [62]:
species_query = """SELECT DISTINCT Code, Scientific_name FROM Species JOIN Bird_nests 
ON Species.Code = Bird_nests.Species  
JOIN Bird_eggs ON Bird_eggs.Nest_ID = Bird_nests.Nest_ID 
ORDER BY Scientific_name;"""


for row in cur.execute(species_query).fetchdf():  # DuckDB lame-o workaround
    species_code = row[0]
    scientific_name = row[1]
    # query egg data for that species (step 3)
    # compute statistics and print results (step 4)

### Step 3:
Construct a query that gathers egg data for a given species, one species at a time; the species code will be a parameter to that query. Compute the forumula W^2L

egg_query = """SELECT Width*Width*Length AS Volume FROM..."""

In [63]:
# Use the query from above but add a query for egg data

# Create egg_query
egg_query = """SELECT Width*Width*Length AS Volume, Species FROM Bird_eggs 
    JOIN Bird_nests ON
    Bird_eggs.Nest_ID = Bird_nests.Nest_ID 
    WHERE Bird_nests.Species = ?"""

for row in cur.execute(species_query).fetchall():  # DuckDB lame-o workaround
    species_code = row[0]
    scientific_name = row[1]
    
    df = pd.read_sql(egg_query, conn, params=[species_code])
    print(df)
    # compute statistics and print results (step 4)

          Volume Species
0   18129.257812    rutu
1   27114.382812    rutu
2   30465.349609    rutu
3   38728.300781    rutu
4   31784.414062    rutu
5   35203.085938    rutu
6   34760.351562    rutu
7   35976.867188    rutu
8   35303.023438    rutu
9   37496.710938    rutu
10  36470.546875    rutu
11  36400.875000    rutu
12  18129.257812    rutu
13  27114.382812    rutu
14  30465.349609    rutu
15  38728.300781    rutu
16  34270.027344    rutu
17  32679.935547    rutu
18  37211.968750    rutu
19  42001.929688    rutu
20  44033.710938    rutu
21  45660.441406    rutu
22  46688.285156    rutu
          Volume Species
0   22976.552734    dunl
1   23460.568359    dunl
2   23576.367188    dunl
3   23206.162109    dunl
4   23200.333984    dunl
5   23714.867188    dunl
6   19850.609375    dunl
7   20749.957031    dunl
8   22288.044922    dunl
9   22131.791016    dunl
10  22265.214844    dunl
          Volume Species
0   20778.173828    wrsa
1   20857.626953    wrsa
2   20141.640625    wrsa


  df = pd.read_sql(egg_query, conn, params=[species_code])
  df = pd.read_sql(egg_query, conn, params=[species_code])
  df = pd.read_sql(egg_query, conn, params=[species_code])
  df = pd.read_sql(egg_query, conn, params=[species_code])
  df = pd.read_sql(egg_query, conn, params=[species_code])
  df = pd.read_sql(egg_query, conn, params=[species_code])
  df = pd.read_sql(egg_query, conn, params=[species_code])


### Step 4: 
Finally, and still within your loop, you’ll want to compute statistics and print out the results:


In [64]:
# For loop
for row in cur.execute(species_query).fetchall():
    species_code = row[0]
    scientific_name = row[1]
    
    df = pd.read_sql(egg_query, conn, params=[species_code])
    cv = round(df.Volume.std()/df.Volume.mean()*100, 2)
    print(f"{scientific_name} {cv}%")

Arenaria interpres 21.12%
Calidris alpina 5.46%
Calidris fuscicollis 16.77%
Charadrius semipalmatus 8.99%
Phalaropus fulicarius 4.65%
Pluvialis dominica 19.88%
Pluvialis squatarola 6.94%


  df = pd.read_sql(egg_query, conn, params=[species_code])
