In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
from dotenv import load_dotenv
import os

In [2]:
df = pd.read_csv('data/ebird data.txt', sep="\t", header=0)

In [3]:
df.columns

Index(['GLOBAL UNIQUE IDENTIFIER', 'LAST EDITED DATE', 'TAXONOMIC ORDER',
       'CATEGORY', 'TAXON CONCEPT ID', 'COMMON NAME', 'SCIENTIFIC NAME',
       'SUBSPECIES COMMON NAME', 'SUBSPECIES SCIENTIFIC NAME', 'EXOTIC CODE',
       'OBSERVATION COUNT', 'BREEDING CODE', 'BREEDING CATEGORY',
       'BEHAVIOR CODE', 'AGE/SEX', 'COUNTRY', 'COUNTRY CODE', 'STATE',
       'STATE CODE', 'COUNTY', 'COUNTY CODE', 'IBA CODE', 'BCR CODE',
       'USFWS CODE', 'ATLAS BLOCK', 'LOCALITY', 'LOCALITY ID', 'LOCALITY TYPE',
       'LATITUDE', 'LONGITUDE', 'OBSERVATION DATE',
       'TIME OBSERVATIONS STARTED', 'OBSERVER ID', 'SAMPLING EVENT IDENTIFIER',
       'PROTOCOL TYPE', 'PROTOCOL CODE', 'PROJECT CODE', 'DURATION MINUTES',
       'EFFORT DISTANCE KM', 'EFFORT AREA HA', 'NUMBER OBSERVERS',
       'ALL SPECIES REPORTED', 'GROUP IDENTIFIER', 'HAS MEDIA', 'APPROVED',
       'REVIEWED', 'REASON', 'TRIP COMMENTS', 'SPECIES COMMENTS',
       'Unnamed: 49'],
      dtype='object')

In [7]:
df["COUNTY CODE"][:10]

0    US-AL-101
1    US-AL-101
2    US-AL-101
3    US-AL-101
4    US-AL-101
5    US-AL-101
6    US-AL-101
7    US-AL-101
8    US-AL-101
9    US-AL-101
Name: COUNTY CODE, dtype: object

In [9]:
df.COUNTY[:10]

0    Montgomery
1    Montgomery
2    Montgomery
3    Montgomery
4    Montgomery
5    Montgomery
6    Montgomery
7    Montgomery
8    Montgomery
9    Montgomery
Name: COUNTY, dtype: object

In [1]:
BASE_URL = "https://ebird.org/checklist/S177363513"

In [26]:
r = requests.get(BASE_URL, verify=False)
r.raise_for_status()

soup = BeautifulSoup(r.content, 'html5lib')



In [47]:
def build_media_data_for_checklist(soup):
    species = soup.findAll('li', attrs = {'data-observationhasphotos':True, 'data-observationhasaudios':True}) 
    species_photos = soup.findAll('li', attrs = {'data-observationhasphotos':True}) 
    species_audios = soup.findAll('li', attrs = {'data-observationhasaudios':True}) 
    species.extend(species_photos)
    species.extend(species_audios)
    
    species_json = []
    for s in species:
        species_info = s.find('div', attrs={'class':'Observation-species'})
        species_soup = BeautifulSoup(str(s))
        # print(species_soup.prettify())
        href = species_info.find('a')['href']
        name = species_info.find('span').text
        media_id_elements = s.findAll('div', attrs = {'data-media-commonname':name})
        media_ids = []
        for e in media_id_elements:
            media_ids.append(e["data-media-id"])
        s_json = {'href':href, 'name':name, 'media_ids':media_ids}
        species_json.append(s_json)
        
    return species_json

In [48]:
species_json = build_media_data_for_checklist(soup)
species_json

[{'href': '/species/wemhar1',
  'name': 'Western Marsh Harrier',
  'media_ids': ['619630301']},
 {'href': '/species/gstlar1',
  'name': 'Greater Short-toed Lark',
  'media_ids': ['619630341', '619630342', '619630343', '619631014']},
 {'href': '/species/banswa',
  'name': 'Bank Swallow',
  'media_ids': ['619630365', '619630366']}]

# Getting recent data for region

In [12]:
load_dotenv()
access_token = os.getenv("EBIRD_API_ACCESS_TOKEN")

headers =  {"Content-Type":"application/json", "x-ebirdapitoken": access_token}
region_code = "US-CO-013"
code2 = "AL"

get_url = "https://api.ebird.org/v2/data/obs/{}/recent".format(region_code)
res = requests.get(get_url, headers=headers, verify=False)
res.json()



[{'speciesCode': 'swahaw',
  'comName': "Swainson's Hawk",
  'sciName': 'Buteo swainsoni',
  'locId': 'L131406',
  'locName': 'McIntosh Lake',
  'obsDt': '2024-05-27 17:21',
  'howMany': 1,
  'lat': 40.1938817,
  'lng': -105.1502538,
  'obsValid': True,
  'obsReviewed': False,
  'locationPrivate': False,
  'subId': 'S177694504'},
 {'speciesCode': 'moudov',
  'comName': 'Mourning Dove',
  'sciName': 'Zenaida macroura',
  'locId': 'L384546',
  'locName': 'Waneka Lake/Greenlee Preserve',
  'obsDt': '2024-05-27 16:30',
  'howMany': 3,
  'lat': 39.9970462,
  'lng': -105.1141834,
  'obsValid': True,
  'obsReviewed': False,
  'locationPrivate': False,
  'subId': 'S177696641'},
 {'speciesCode': 'osprey',
  'comName': 'Osprey',
  'sciName': 'Pandion haliaetus',
  'locId': 'L384546',
  'locName': 'Waneka Lake/Greenlee Preserve',
  'obsDt': '2024-05-27 16:30',
  'howMany': 1,
  'lat': 39.9970462,
  'lng': -105.1141834,
  'obsValid': True,
  'obsReviewed': False,
  'locationPrivate': False,
  'sub

In [9]:
res_json = res.json()
print(len(res_json))

86


In [10]:
max_res = 100
get_checklists_url = "https://api.ebird.org/v2/product/lists/{}?maxResults={}".format(region_code, max_res)
res = requests.get(get_checklists_url, headers=headers, verify=False)
res.json()
res_json = res.json()
print(len(res_json))



100


In [11]:
res_json[:-10]

[{'locId': 'L32462420',
  'subId': 'S177673251',
  'userDisplayName': 'Ryan Giordanelli',
  'numSpecies': 1,
  'obsDt': '27 May 2024',
  'obsTime': '16:38',
  'isoObsDate': '2024-05-27 16:38',
  'subID': 'S177673251',
  'loc': {'locId': 'L32462420',
   'name': '11700–11842 Atlanta Hwy, Montgomery US-AL (32.3696,-86.0751)',
   'latitude': 32.369609,
   'longitude': -86.075086,
   'countryCode': 'US',
   'countryName': 'United States',
   'subnational1Name': 'Alabama',
   'subnational1Code': 'US-AL',
   'subnational2Code': 'US-AL-101',
   'subnational2Name': 'Montgomery',
   'isHotspot': False,
   'locName': '11700–11842 Atlanta Hwy, Montgomery US-AL (32.3696,-86.0751)',
   'lat': 32.369609,
   'lng': -86.075086,
   'hierarchicalName': '11700–11842 Atlanta Hwy, Montgomery US-AL (32.3696,-86.0751), Montgomery, Alabama, US',
   'locID': 'L32462420'}},
 {'locId': 'L6052570',
  'subId': 'S177725506',
  'userDisplayName': 'David McVay',
  'numSpecies': 7,
  'obsDt': '27 May 2024',
  'obsTime'