In [4]:
# column names
# Sequence,Taxon_rank,Order,Family,Family_English_name,Scientific_name,Authority,Bibliographic_details,English_name_AviList,English_name_Clements_v2024,English_name_BirdLife_v9,Proposal_number,Decision_summary,Range,Extinct_or_possibly_extinct,IUCN_Red_List_Category,BirdLife_DataZone_URL,Species_code_Cornell_Lab,Birds_of_the_World_URL,AvibaseID,Gender_of_genus,Type_species_of_genus,Type_locality,Title_of_original_description,Original_description_URL,Protonym

import sqlite3
from pathlib import Path

# Set up paths
project_root = Path.cwd().parent  # Go up one level from notebooks/
data_dir = project_root / "data"
db_path = data_dir / "master_birder.db"
print(f"Database path: {db_path}")

# Use with statement for proper connection management
with sqlite3.connect(db_path) as conn:
    cursor = conn.cursor()
    
    # List all tables
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
    tables = cursor.fetchall()
    for table in tables:
        print(f"  - {table[0]}")
    

Database path: /Users/ken/Documents/wk/master-birder-paper/data/master_birder.db
  - AvibaseID
  - ParentChildRelationships
  - OriginalConcepts
  - TaxanomicConcepts
  - NameConcepts
  - LifeHistory
  - GeoGraphicRange
  - OtherRelationships
  - Synonyms


In [None]:
def select_all_from_table(db_path: str, table_name: str):
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        cursor.execute(f"SELECT * FROM {table_name};")
        rows = cursor.fetchall()
        return rows

def describe_all(db_path: str):
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        # List all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
        tables = cursor.fetchall()
        for table in tables:
            table_name = table[0]
            print(f"\nTable: {table_name}")
            print("-" * 50)
            # Use PRAGMA table_info for SQLite
            cursor.execute(f"PRAGMA table_info({table_name});")
            columns = cursor.fetchall()
            for col in columns:
                print(f"  {col[1]} ({col[2]}) - {'NOT NULL' if col[3] else 'NULL'} - {'PK' if col[5] else ''}")

def truncate_all(db_path: str):
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        # List all tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
        tables = cursor.fetchall()
        for table in tables:
            sql = f"DELETE FROM {table[0]};"
            print(f"{sql}")
            cursor.execute(sql)

#describe_all(db_path=db_path)
#truncate_all(db_path)

DELETE FROM AvibaseID;
DELETE FROM ParentChildRelationships;
DELETE FROM OriginalConcepts;
DELETE FROM TaxanomicConcepts;
DELETE FROM NameConcepts;
DELETE FROM LifeHistory;
DELETE FROM GeoGraphicRange;
DELETE FROM OtherRelationships;
DELETE FROM Synonyms;


In [None]:
# # CLEAN
# truncate_all(db_path=db_path)

DELETE FROM AvibaseID;
DELETE FROM ParentChildRelationships;
DELETE FROM OriginalConcepts;
DELETE FROM TaxanomicConcepts;
DELETE FROM NameConcepts;
DELETE FROM LifeHistory;
DELETE FROM GeoGraphicRange;
DELETE FROM OtherRelationships;
DELETE FROM Synonyms;


In [None]:
# READ Stuff
with sqlite3.connect(db_path) as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT * FROM AvibaseID;")
    rows = cursor.fetchall()
    for row in rows:
        print(row)

('AVIBASE-123456', 'American Robin')
('AVIBASE-123457', 'Northern Cardinal')
('AVIBASE-123458', 'Blue Jay')
('AVIBASE-123459', 'House Sparrow')
('AVIBASE-123460', 'House Finch')


In [38]:
# AvibaseID, TaxanomicConcepts, NameConcepts, OriginalConcepts
select_all_from_table(db_path=db_path, table_name="OriginalConcepts")

[('avibase-order-1', 'ORDER-1'),
 ('avibase-order-10', 'ORDER-10'),
 ('avibase-order-24', 'ORDER-24'),
 ('avibase-order-34', 'ORDER-34'),
 ('avibase-order-47', 'ORDER-47'),
 ('avibase-order-230', 'ORDER-230'),
 ('avibase-order-588', 'ORDER-588'),
 ('avibase-order-1613', 'ORDER-1613'),
 ('avibase-order-1626', 'ORDER-1626'),
 ('avibase-order-1701', 'ORDER-1701'),
 ('avibase-order-1761', 'ORDER-1761'),
 ('avibase-order-1830', 'ORDER-1830'),
 ('avibase-order-2219', 'ORDER-2219'),
 ('avibase-order-2226', 'ORDER-2226'),
 ('avibase-order-2274', 'ORDER-2274'),
 ('avibase-order-3233', 'ORDER-3233'),
 ('avibase-order-3237', 'ORDER-3237'),
 ('avibase-order-3725', 'ORDER-3725'),
 ('avibase-order-4639', 'ORDER-4639'),
 ('avibase-order-4649', 'ORDER-4649'),
 ('avibase-order-4668', 'ORDER-4668'),
 ('avibase-order-4678', 'ORDER-4678'),
 ('avibase-order-4715', 'ORDER-4715'),
 ('avibase-order-4970', 'ORDER-4970'),
 ('avibase-order-5006', 'ORDER-5006'),
 ('avibase-order-5142', 'ORDER-5142'),
 ('avibase-o

In [36]:
# Load Order data from AL25-order-with-IDs.csv
import csv

def load_order_data(db_path: str, csv_file_path: str):
    """Load order data from CSV into AvibaseID, TaxanomicConcepts, and NameConcepts tables"""
    
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        with open(csv_file_path, 'r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            
            for row in reader:
                # Extract data from CSV row
                concept_id = row['concept_id']
                # sequence = row['Sequence']
                avibase_id = row['AvibaseID']
                scientific_name = row['Scientific_name']
                taxon_name_id = row['taxon_name_id']
                # proposal_number = row['Proposal_number'] if row['Proposal_number'] else None
                decision_summary = row['Decision_summary'] if row['Decision_summary'] else None
                
                # 1. Insert into AvibaseID table
                cursor.execute("""
                    INSERT OR REPLACE INTO AvibaseID (avibase_id, concept_label)
                    VALUES (?, ?)
                """, (avibase_id, scientific_name))
                
                # 2. Insert into TaxanomicConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO TaxanomicConcepts 
                    (concept_id, avibase_id, taxon_name_id, authority, scientific_name, common_name, higher_classification)
                    VALUES (?, ?, ?, ?, ?, ?, ?)
                """, (concept_id, avibase_id, taxon_name_id, None, scientific_name, None, "Aves"))
                
                # 3. Insert into NameConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO NameConcepts 
                    (taxon_name_id, protonym, authors, year, publication_source, tsn)
                    VALUES (?, ?, ?, ?, ?, ?)
                """, (taxon_name_id, None, None, None, decision_summary, None))
                
                # 4. Insert into OriginalConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO OriginalConcepts 
                    (avibase_id, concept_id)
                    VALUES (?, ?)
                """, (avibase_id, concept_id))
        
        conn.commit()
        print(f"Successfully loaded order data from {csv_file_path}")

# Load the order data
csv_file_path = data_dir / "AL25-order-with-IDs.csv"
load_order_data(db_path, csv_file_path)


Successfully loaded order data from /Users/ken/Documents/wk/master-birder-paper/data/AL25-order-with-IDs.csv


# FAMILY

PROMPT WAS:

i'd like to do a similar transformation to AL25-family.csv into AL25-famil-with-ids.csv. i want the new file to:
1. not have columns that are unused
2. have a concept_id in this form "FAMILY-{Sequence}"
3. have an AvibaseID in this form "avibase-family-{Sequence}"
4. have a taxon_name_id of "taxonname-family-{Sequence}"

Instead of creating it for me, instead give me script to do so.

In [None]:
import csv

def transform_family_csv(input_file, output_file):
    """Transform AL25-family.csv to AL25-family-with-IDs.csv"""
    
    # Columns to keep (non-empty ones)
    columns_to_keep = [
        'Sequence', 'Taxon_rank', 'Order', 'Family', 'Family_English_name', 
        'Scientific_name', 'Proposal_number', 'Decision_summary'
    ]
    
    with open(input_file, 'r', encoding='utf-8') as infile, \
         open(output_file, 'w', encoding='utf-8', newline='') as outfile:
        
        reader = csv.DictReader(infile)
        
        # Create new fieldnames with the ID columns
        new_fieldnames = ['concept_id'] + columns_to_keep + ['AvibaseID', 'taxon_name_id']
        writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)
        writer.writeheader()
        
        for row in reader:
            # Skip empty rows
            if not row.get('Sequence'):
                continue
                
            # Create new row with only the columns we want
            new_row = {}
            
            # Add the ID columns
            sequence = row['Sequence']
            new_row['concept_id'] = f"FAMILY-{sequence}"
            new_row['AvibaseID'] = f"avibase-family-{sequence}"
            new_row['taxon_name_id'] = f"taxonname-family-{sequence}"
            
            # Add the columns we want to keep
            for col in columns_to_keep:
                new_row[col] = row.get(col, '')
            
            writer.writerow(new_row)
    
    print(f"Successfully created {output_file}")

# Usage
curr_dir = "/Users/ken/Documents/wk/master-birder-paper"
input_file = f"{curr_dir}/data/AL25-family.csv"
output_file = f"{curr_dir}/data/AL25-family-with-IDs.csv"
transform_family_csv(input_file, output_file)


Successfully created /Users/ken/Documents/wk/master-birder-paper/data/AL25-family-with-IDs.csv


# Genera

i'd like to do a similar transformation to AL25-genera.csv into AL25-genera-with-ids.csv. i want the new file to:
1. not have columns that are unused
2. have a concept_id in this form "GENUS-{Sequence}"
3. have an AvibaseID in this form "avibase-genus-{Sequence}"
4. have a taxon_name_id of "taxonname-genus-{Sequence}"

Here's a script to do so, like for family.

In [3]:
import csv

def transform_genera_csv(input_file, output_file):
    """Transform AL25-genera.csv to AL25-genera-with-IDs.csv"""
    
    # Columns to keep (non-empty ones)
    columns_to_keep = [
        'Sequence', 'Taxon_rank', 'Order', 'Family', 'Family_English_name', 
        'Scientific_name', 'Authority', 'Proposal_number', 'Decision_summary',
        'Gender_of_genus', 'Type_species_of_genus', 'Type_locality', 
        'Title_of_original_description', 'Original_description_URL', 'Protonym'
    ]
    
    with open(input_file, 'r', encoding='utf-8') as infile, \
         open(output_file, 'w', encoding='utf-8', newline='') as outfile:
        
        reader = csv.DictReader(infile)
        
        # Create new fieldnames with the ID columns
        new_fieldnames = ['concept_id'] + columns_to_keep + ['AvibaseID', 'taxon_name_id']
        writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)
        writer.writeheader()
        
        for row in reader:
            # Skip empty rows
            if not row.get('Sequence'):
                continue
                
            # Create new row with only the columns we want
            new_row = {}
            
            # Add the ID columns
            sequence = row['Sequence']
            new_row['concept_id'] = f"GENUS-{sequence}"
            new_row['AvibaseID'] = f"avibase-genus-{sequence}"
            new_row['taxon_name_id'] = f"taxonname-genus-{sequence}"
            
            # Add the columns we want to keep
            for col in columns_to_keep:
                new_row[col] = row.get(col, '')
            
            writer.writerow(new_row)
    
    print(f"Successfully created {output_file}")

# Usage
curr_dir = "/Users/ken/Documents/wk/master-birder-paper"
input_file = f"{curr_dir}/data/AL25-genera.csv"
output_file = f"{curr_dir}/data/AL25-genera-with-IDs.csv"
transform_genera_csv(input_file, output_file)


Successfully created /Users/ken/Documents/wk/master-birder-paper/data/AL25-genera-with-IDs.csv


# Species

i'd like to do a similar transformation to AL25-species.csv into AL25-species-with-ids.csv. i want the new file to:
1. not have columns that are unused
2. have a concept_id in this form "SPECIES-{Sequence}"
3. have a taxon_name_id of "taxonname-species-{Sequence}"

Notice you don't have to add a new AvibaseID already because each species row already has one.

Here's a script to do so, like for genera.

In [None]:
import csv

def transform_species_csv(input_file, output_file):
    """Transform AL25-species.csv to AL25-species-with-IDs.csv"""
    
    # Columns to keep (non-empty ones) - note: AvibaseID already exists
    columns_to_keep = [
        'Sequence', 'Taxon_rank', 'Order', 'Family', 'Family_English_name', 
        'Scientific_name', 'Authority', 'Bibliographic_details', 'English_name_AviList',
        'English_name_Clements_v2024', 'English_name_BirdLife_v9', 'Proposal_number', 
        'Decision_summary', 'Range', 'Extinct_or_possibly_extinct', 'IUCN_Red_List_Category',
        'BirdLife_DataZone_URL', 'Species_code_Cornell_Lab', 'Birds_of_the_World_URL',
        'AvibaseID', 'Gender_of_genus', 'Type_species_of_genus', 'Type_locality', 
        'Title_of_original_description', 'Original_description_URL', 'Protonym'
    ]
    
    with open(input_file, 'r', encoding='utf-8') as infile, \
         open(output_file, 'w', encoding='utf-8', newline='') as outfile:
        
        reader = csv.DictReader(infile)
        
        # Create new fieldnames with the ID columns (concept_id at front, taxon_name_id at end)
        new_fieldnames = ['concept_id'] + columns_to_keep + ['taxon_name_id']
        writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)
        writer.writeheader()
        
        for row in reader:
            # Skip empty rows
            if not row.get('Sequence'):
                continue
                
            # Create new row with only the columns we want
            new_row = {}
            
            # Add the ID columns
            sequence = row['Sequence']
            new_row['concept_id'] = f"SPECIES-{sequence}"
            new_row['taxon_name_id'] = f"taxonname-species-{sequence}"
            
            # Add the columns we want to keep (including existing AvibaseID)
            for col in columns_to_keep:
                new_row[col] = row.get(col, '')
            
            writer.writerow(new_row)
    
    print(f"Successfully created {output_file}")

# Usage
curr_dir = "/Users/ken/Documents/wk/master-birder-paper"
input_file = f"{curr_dir}/data/AL25-species.csv"
output_file = f"{curr_dir}/data/AL25-species-with-IDs.csv"
transform_species_csv(input_file, output_file)

# this ended up with 3 problem rows saved in data/Problem.rows.csv
# I just manually removed them from with -with-IDs files just for expediency
# but will can come back later to fix to make this more robust


Successfully created /Users/ken/Documents/wk/master-birder-paper/data/AL25-species-with-IDs.csv


# Load Family Data

similar to load_order_data(), I want to load AL25-families-with-IDs, AL25-genera-with-IDs, & AL25-species-with-IDs into the same tables. the following script should do that (even thought they all don't have all the columns). Additionally, AL25-orders-with-IDs could also be loaded with the same script.

In [7]:
# Load Family, Genera, Species, and Orders Data
import csv

def load_taxonomic_data(db_path: str, csv_file_path: str, taxon_type: str):
    """Load taxonomic data from CSV into AvibaseID, TaxanomicConcepts, and NameConcepts tables
    
    Args:
        db_path: Path to the SQLite database
        csv_file_path: Path to the CSV file with IDs
        taxon_type: Type of taxon ('order', 'family', 'genus', 'species') for higher_classification
    """
    
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        with open(csv_file_path, 'r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            
            for row in reader:
                # Extract data from CSV row
                concept_id = row['concept_id']
                avibase_id = row['AvibaseID']
                scientific_name = row['Scientific_name']
                taxon_name_id = row['taxon_name_id']
                decision_summary = row.get('Decision_summary', '') if row.get('Decision_summary') else None
                authority = row.get('Authority', '') if row.get('Authority') else None
                protonym = row.get('Protonym', '') if row.get('Protonym') else None
                
                # Set higher classification based on taxon type
                if taxon_type == 'order':
                    higher_classification = "Aves"
                elif taxon_type == 'family':
                    higher_classification = f"Order: {row.get('Order', 'Unknown')}"
                elif taxon_type == 'genus':
                    higher_classification = f"Family: {row.get('Family', 'Unknown')}"
                elif taxon_type == 'species':
                    higher_classification = f"Genus: {row.get('Scientific_name', '').split()[0] if row.get('Scientific_name') else 'Unknown'}"
                else:
                    higher_classification = "Unknown"
                
                # Get common name (different fields for different taxon types)
                common_name = None
                if taxon_type == 'family':
                    common_name = row.get('Family_English_name', '') if row.get('Family_English_name') else None
                elif taxon_type == 'species':
                    # Try different common name fields
                    common_name = (row.get('English_name_AviList', '') or 
                                 row.get('English_name_Clements_v2024', '') or 
                                 row.get('English_name_BirdLife_v9', '')) if any([
                                     row.get('English_name_AviList'),
                                     row.get('English_name_Clements_v2024'),
                                     row.get('English_name_BirdLife_v9')
                                 ]) else None
                
                # 1. Insert into AvibaseID table
                cursor.execute("""
                    INSERT OR REPLACE INTO AvibaseID (avibase_id, concept_label)
                    VALUES (?, ?)
                """, (avibase_id, scientific_name))
                
                # 2. Insert into TaxanomicConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO TaxanomicConcepts 
                    (concept_id, avibase_id, taxon_name_id, authority, scientific_name, common_name, higher_classification)
                    VALUES (?, ?, ?, ?, ?, ?, ?)
                """, (concept_id, avibase_id, taxon_name_id, authority, scientific_name, common_name, higher_classification))
                
                # 3. Insert into NameConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO NameConcepts 
                    (taxon_name_id, protonym, authors, year, publication_source, tsn)
                    VALUES (?, ?, ?, ?, ?, ?)
                """, (taxon_name_id, protonym, authority, None, decision_summary, None))
                
                # 4. Insert into OriginalConcepts table
                cursor.execute("""
                    INSERT OR REPLACE INTO OriginalConcepts 
                    (avibase_id, concept_id)
                    VALUES (?, ?)
                """, (avibase_id, concept_id))
        
        conn.commit()
        print(f"Successfully loaded {taxon_type} data from {csv_file_path}")

# Load all the taxonomic data
data_files = [
    ("AL25-order-with-IDs.csv", "order"),
    ("AL25-family-with-IDs.csv", "family"), 
    ("AL25-genera-with-IDs.csv", "genus"),
    ("AL25-species-with-IDs.csv", "species")
]

for filename, taxon_type in data_files:
    csv_file_path = data_dir / filename
    if csv_file_path.exists():
        load_taxonomic_data(db_path, csv_file_path, taxon_type)
    else:
        print(f"Warning: {filename} not found, skipping...")


Successfully loaded order data from /Users/ken/Documents/wk/master-birder-paper/data/AL25-order-with-IDs.csv
Successfully loaded family data from /Users/ken/Documents/wk/master-birder-paper/data/AL25-family-with-IDs.csv
Successfully loaded genus data from /Users/ken/Documents/wk/master-birder-paper/data/AL25-genera-with-IDs.csv
Successfully loaded species data from /Users/ken/Documents/wk/master-birder-paper/data/AL25-species-with-IDs.csv


# Populate parent hierarchy


```sql
-- Clear existing data
DELETE FROM ParentChildRelationships;

-- 1. FAMILY -> ORDER relationships
INSERT INTO ParentChildRelationships (avibase_id, version, parent_id, fract_weight)
SELECT 
    f.avibase_id,
    'AL25' as version,
    o.avibase_id as parent_id,
    1.0 as fract_weight
FROM TaxanomicConcepts f
JOIN TaxanomicConcepts o ON f.higher_classification = 'Order: ' || o.scientific_name
WHERE f.concept_id LIKE 'FAMILY-%'
  AND o.concept_id LIKE 'ORDER-%';

-- 2. GENUS -> FAMILY relationships  
INSERT INTO ParentChildRelationships (avibase_id, version, parent_id, fract_weight)
SELECT 
    g.avibase_id,
    'AL25' as version,
    f.avibase_id as parent_id,
    1.0 as fract_weight
FROM TaxanomicConcepts g
JOIN TaxanomicConcepts f ON g.higher_classification = 'Family: ' || f.scientific_name
WHERE g.concept_id LIKE 'GENUS-%'
  AND f.concept_id LIKE 'FAMILY-%';

-- 3. SPECIES -> GENUS relationships
INSERT INTO ParentChildRelationships (avibase_id, version, parent_id, fract_weight)
SELECT 
    s.avibase_id,
    'AL25' as version,
    g.avibase_id as parent_id,
    1.0 as fract_weight
FROM TaxanomicConcepts s
JOIN TaxanomicConcepts g ON s.higher_classification = 'Genus: ' || g.scientific_name
WHERE s.concept_id LIKE 'SPECIES-%'
  AND g.concept_id LIKE 'GENUS-%';

-- Verify the results
SELECT 
    'Total relationships created' as description,
    COUNT(*) as count
FROM ParentChildRelationships

UNION ALL

SELECT 
    'FAMILY->ORDER relationships' as description,
    COUNT(*) as count
FROM ParentChildRelationships pcr
JOIN TaxanomicConcepts tc ON pcr.avibase_id = tc.avibase_id
WHERE tc.concept_id LIKE 'FAMILY-%'

UNION ALL

SELECT 
    'GENUS->FAMILY relationships' as description,
    COUNT(*) as count
FROM ParentChildRelationships pcr
JOIN TaxanomicConcepts tc ON pcr.avibase_id = tc.avibase_id
WHERE tc.concept_id LIKE 'GENUS-%'

UNION ALL

SELECT 
    'SPECIES->GENUS relationships' as description,
    COUNT(*) as count
FROM ParentChildRelationships pcr
JOIN TaxanomicConcepts tc ON pcr.avibase_id = tc.avibase_id
WHERE tc.concept_id LIKE 'SPECIES-%';
```


In [8]:
# Populate ParentChildRelationships table
# This creates hierarchical relationships between taxonomic levels

def populate_parent_child_relationships(db_path: str):
    """Populate the ParentChildRelationships table with taxonomic hierarchy"""
    
    sql_script = """
    -- Clear existing data
    DELETE FROM ParentChildRelationships;

    -- 1. FAMILY -> ORDER relationships
    INSERT INTO ParentChildRelationships (avibase_id, version, parent_id, fract_weight)
    SELECT 
        f.avibase_id,
        'AL25' as version,
        o.avibase_id as parent_id,
        1.0 as fract_weight
    FROM TaxanomicConcepts f
    JOIN TaxanomicConcepts o ON f.higher_classification = 'Order: ' || o.scientific_name
    WHERE f.concept_id LIKE 'FAMILY-%'
      AND o.concept_id LIKE 'ORDER-%';

    -- 2. GENUS -> FAMILY relationships  
    INSERT INTO ParentChildRelationships (avibase_id, version, parent_id, fract_weight)
    SELECT 
        g.avibase_id,
        'AL25' as version,
        f.avibase_id as parent_id,
        1.0 as fract_weight
    FROM TaxanomicConcepts g
    JOIN TaxanomicConcepts f ON g.higher_classification = 'Family: ' || f.scientific_name
    WHERE g.concept_id LIKE 'GENUS-%'
      AND f.concept_id LIKE 'FAMILY-%';

    -- 3. SPECIES -> GENUS relationships
    INSERT INTO ParentChildRelationships (avibase_id, version, parent_id, fract_weight)
    SELECT 
        s.avibase_id,
        'AL25' as version,
        g.avibase_id as parent_id,
        1.0 as fract_weight
    FROM TaxanomicConcepts s
    JOIN TaxanomicConcepts g ON s.higher_classification = 'Genus: ' || g.scientific_name
    WHERE s.concept_id LIKE 'SPECIES-%'
      AND g.concept_id LIKE 'GENUS-%';
    """
    
    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        
        # Execute the main script
        cursor.executescript(sql_script)
        
        # Verify the results
        verification_queries = [
            ("Total relationships created", "SELECT COUNT(*) FROM ParentChildRelationships"),
            ("FAMILY->ORDER relationships", """
                SELECT COUNT(*) 
                FROM ParentChildRelationships pcr
                JOIN TaxanomicConcepts tc ON pcr.avibase_id = tc.avibase_id
                WHERE tc.concept_id LIKE 'FAMILY-%'
            """),
            ("GENUS->FAMILY relationships", """
                SELECT COUNT(*) 
                FROM ParentChildRelationships pcr
                JOIN TaxanomicConcepts tc ON pcr.avibase_id = tc.avibase_id
                WHERE tc.concept_id LIKE 'GENUS-%'
            """),
            ("SPECIES->GENUS relationships", """
                SELECT COUNT(*) 
                FROM ParentChildRelationships pcr
                JOIN TaxanomicConcepts tc ON pcr.avibase_id = tc.avibase_id
                WHERE tc.concept_id LIKE 'SPECIES-%'
            """)
        ]
        
        print("ParentChildRelationships populated successfully!")
        print("=" * 50)
        
        for description, query in verification_queries:
            cursor.execute(query)
            count = cursor.fetchone()[0]
            print(f"{description}: {count}")
        
        conn.commit()

# Run the population script
populate_parent_child_relationships(db_path)


ParentChildRelationships populated successfully!
Total relationships created: 13754
FAMILY->ORDER relationships: 252
GENUS->FAMILY relationships: 2376
SPECIES->GENUS relationships: 11125
