In [7]:
# column names
# Sequence,Taxon_rank,Order,Family,Family_English_name,Scientific_name,Authority,Bibliographic_details,English_name_AviList,English_name_Clements_v2024,English_name_BirdLife_v9,Proposal_number,Decision_summary,Range,Extinct_or_possibly_extinct,IUCN_Red_List_Category,BirdLife_DataZone_URL,Species_code_Cornell_Lab,Birds_of_the_World_URL,AvibaseID,Gender_of_genus,Type_species_of_genus,Type_locality,Title_of_original_description,Original_description_URL,Protonym

import sqlite3
import os
from pathlib import Path

# Set up paths
project_root = Path.cwd().parent  # Go up one level from notebooks/
data_dir = project_root / "data"
db_path = data_dir / "master_birder.db"
print(f"Database path: {db_path}")

# Use with statement for proper connection management
with sqlite3.connect(db_path) as conn:
    cursor = conn.cursor()
    
    # List all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
    tables = cursor.fetchall()
    for table in tables:
        print(f"  - {table[0]}")
    

Database path: /Users/ken/Documents/wk/master-birder-paper/data/master_birder.db
  - AvibaseID
  - ParentChildRelationships
  - OriginalConcepts
  - TaxanomicConcepts
  - NameConcepts
  - LifeHistory
  - GeoGraphicRange
  - OtherRelationships
  - Synonyms


In [27]:
def select_all_from_table(db_path: str, table_name: str):
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        cursor.execute(f"SELECT * FROM {table_name};")
        rows = cursor.fetchall()
        return rows

def describe_all(db_path: str):
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        # List all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
        tables = cursor.fetchall()
        for table in tables:
            table_name = table[0]
            print(f"\nTable: {table_name}")
            print("-" * 50)
            # Use PRAGMA table_info for SQLite
            cursor.execute(f"PRAGMA table_info({table_name});")
            columns = cursor.fetchall()
            for col in columns:
                print(f"  {col[1]} ({col[2]}) - {'NOT NULL' if col[3] else 'NULL'} - {'PK' if col[5] else ''}")

def truncate_all(db_path: str):
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        # List all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
        tables = cursor.fetchall()
        for table in tables:
            sql = f"DELETE FROM {table[0]};"
            print(f"{sql}")
            cursor.execute(sql)

describe_all(db_path=db_path)


Table: AvibaseID
--------------------------------------------------
  avibase_id (TEXT) - NULL - PK
  concept_label (TEXT) - NULL - 

Table: ParentChildRelationships
--------------------------------------------------
  avibase_id (TEXT) - NULL - PK
  version (TEXT) - NULL - PK
  parent_id (TEXT) - NULL - 
  fract_weight (REAL) - NULL - 

Table: OriginalConcepts
--------------------------------------------------
  avibase_id (TEXT) - NULL - PK
  concept_id (TEXT) - NULL - PK

Table: TaxanomicConcepts
--------------------------------------------------
  concept_id (TEXT) - NULL - PK
  avibase_id (TEXT) - NULL - 
  taxon_name_id (TEXT) - NULL - 
  authority (TEXT) - NULL - 
  scientific_name (TEXT) - NULL - 
  common_name (TEXT) - NULL - 
  higher_classification (TEXT) - NULL - 

Table: NameConcepts
--------------------------------------------------
  taxon_name_id (TEXT) - NULL - PK
  protonym (TEXT) - NULL - 
  authors (TEXT) - NULL - 
  year (INTEGER) - NULL - 
  publication_source (T

In [None]:
# CLEAN
# truncate_all(db_path=db_path)

DELETE FROM AvibaseID;
DELETE FROM ParentChildRelationships;
DELETE FROM OriginalConcepts;
DELETE FROM TaxanomicConcepts;
DELETE FROM NameConcepts;
DELETE FROM LifeHistory;
DELETE FROM GeoGraphicRange;
DELETE FROM OtherRelationships;
DELETE FROM Synonyms;


In [None]:
# READ Stuff
with sqlite3.connect(db_path) as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM AvibaseID;")
    rows = cursor.fetchall()
    for row in rows:
        print(row)

('AVIBASE-123456', 'American Robin')
('AVIBASE-123457', 'Northern Cardinal')
('AVIBASE-123458', 'Blue Jay')
('AVIBASE-123459', 'House Sparrow')
('AVIBASE-123460', 'House Finch')


In [29]:
select_all_from_table(db_path=db_path, table_name="AvibaseID")

[]

In [None]:
# Load Order data from AL25-order-with-IDs.csv
import csv

def load_order_data(db_path: str, csv_file_path: str):
    """Load order data from CSV into AvibaseID, TaxanomicConcepts, and NameConcepts tables"""
    
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        with open(csv_file_path, 'r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            
            for row in reader:
                # Extract data from CSV row
                concept_id = row['concept_id']
                sequence = row['Sequence']
                avibase_id = row['AvibaseID']
                scientific_name = row['Scientific_name']
                taxon_name_id = row['taxon_name_id']
                proposal_number = row['Proposal_number'] if row['Proposal_number'] else None
                decision_summary = row['Decision_summary'] if row['Decision_summary'] else None
                
                # 1. Insert into AvibaseID table
                cursor.execute("""
                    INSERT OR REPLACE INTO AvibaseID (avibase_id, concept_label)
                    VALUES (?, ?)
                """, (avibase_id, scientific_name))
                
                # 2. Insert into TaxanomicConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO TaxanomicConcepts 
                    (concept_id, avibase_id, taxon_name_id, authority, scientific_name, common_name, higher_classification)
                    VALUES (?, ?, ?, ?, ?, ?, ?)
                """, (concept_id, avibase_id, taxon_name_id, None, scientific_name, None, "Aves"))
                
                # 3. Insert into NameConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO NameConcepts 
                    (taxon_name_id, protonym, authors, year, publication_source, tsn)
                    VALUES (?, ?, ?, ?, ?, ?)
                """, (taxon_name_id, None, None, None, None, None))
        
        conn.commit()
        print(f"Successfully loaded order data from {csv_file_path}")

# Load the order data
csv_file_path = data_dir / "AL25-order-with-IDs.csv"
load_order_data(db_path, csv_file_path)
