In [26]:
import requests
import pandas as pd
import time

In [27]:
RECORDS_PER_YEAR = 300 
YEARS_TO_FETCH = range(2016, 2025) 

species_map = {
"European pied flycatcher": "Ficedula hypoleuca",  #1
    "Spotted flycatcher": "Muscicapa striata",  #2
    "Collared flycatcher": "Ficedula albicollis",   #3
    "Wood warbler": "Phylloscopus sibilatrix",  #4
    "Willow warbler": "Phylloscopus trochilus", #5
    "Common Redstart": "Phoenicurus phoenicurus",   #6
    "Brambling": "Fringilla montifringilla",    #7
    "Eurasian Siskin": "Spinus spinus", #8
    "Blackcap": "Sylvia atricapilla",   #9
    "Wood Pigeon": "Columba palumbus"   #10
}
# 10 * 2500 = 25k samples, sweet

In [28]:
def get_phase(month):
    if month in [5, 6, 7]: return "Breeding"
    elif month in [8, 9, 10]: return "Fall Migration"
    elif month in [11, 12, 1, 2]: return "Wintering"
    elif month in [3, 4]: return "Spring Migration"
    return "Unknown"

In [29]:
all_records = []
print(f"Starting collection from {YEARS_TO_FETCH.start} to {YEARS_TO_FETCH.stop - 1}...")

for common_name, scientific_name in species_map.items():
    print(f"\nProcessing: {common_name}...")
    
    try:
        r = requests.get("https://api.gbif.org/v1/species/match", 
                         params={'name': scientific_name, 'kingdom': 'Animalia'})
        taxon_key = r.json().get('usageKey')
    except:
        print(" -> Error getting Taxon Key")
        continue

    if not taxon_key:
        print(" -> Key not found.")
        continue

    for year in YEARS_TO_FETCH:
        search_params = {
            'taxonKey': taxon_key,
            'hasCoordinate': 'true',
            'year': year,          
            'limit': RECORDS_PER_YEAR, 
            'offset': 0         
        }

        try:
            r_occ = requests.get("https://api.gbif.org/v1/occurrence/search", params=search_params)
            results = r_occ.json().get('results', [])
            
            count = 0
            for rec in results:
                if rec.get('decimalLatitude') and rec.get('month'):
                    all_records.append({
                        'species': common_name,
                        'scientific_name': scientific_name,
                        'latitude': rec.get('decimalLatitude'),
                        'longitude': rec.get('decimalLongitude'),
                        'year': rec.get('year'),
                        'month': rec.get('month'),
                        'day': rec.get('day'),
                        'country': rec.get('country'),
                        'phase': get_phase(rec.get('month'))
                    })
                    count += 1
            
            print(f"   -> Year {year}: Found {count} records")
            time.sleep(0.2)
            
        except Exception as e:
            print(f"   -> Error fetching {year}: {e}")

Starting collection from 2016 to 2024...

Processing: European pied flycatcher...
   -> Year 2016: Found 300 records
   -> Year 2017: Found 300 records
   -> Year 2018: Found 300 records
   -> Year 2019: Found 300 records
   -> Year 2020: Found 300 records
   -> Year 2021: Found 300 records
   -> Year 2022: Found 300 records
   -> Year 2023: Found 300 records
   -> Year 2024: Found 300 records

Processing: Spotted flycatcher...
   -> Year 2016: Found 300 records
   -> Year 2017: Found 300 records
   -> Year 2018: Found 300 records
   -> Year 2019: Found 300 records
   -> Year 2020: Found 300 records
   -> Year 2021: Found 300 records
   -> Year 2022: Found 300 records
   -> Year 2023: Found 300 records
   -> Year 2024: Found 300 records

Processing: Collared flycatcher...
   -> Year 2016: Found 300 records
   -> Year 2017: Found 300 records
   -> Year 2018: Found 300 records
   -> Year 2019: Found 300 records
   -> Year 2020: Found 300 records
   -> Year 2021: Found 300 records
   -> Y

In [30]:
df = pd.DataFrame(all_records)
df = df.dropna(subset=['latitude', 'longitude', 'month', 'year'])

df = df.sort_values(by=['species', 'year', 'month'])

output_file = 'populated_bird_data.csv'
df.to_csv(output_file, index=False)

print(f"\nDONE! Saved {len(df)} records.")
print(f"Data is spread from {df['year'].min()} to {df['year'].max()}.")
print(df.groupby('species')['year'].nunique()) 


DONE! Saved 26918 records.
Data is spread from 2016 to 2024.
species
Blackcap                    9
Brambling                   9
Collared flycatcher         9
Common Redstart             9
Eurasian Siskin             9
European pied flycatcher    9
Spotted flycatcher          9
Willow warbler              9
Wood Pigeon                 9
Wood warbler                9
Name: year, dtype: int64
