In [None]:
!pip3 install python-dotenv

In [2]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
from dotenv import load_dotenv
import os

In [10]:
from glob import glob
path = "data/bird_images/valid"
files = glob(f'{path}/**/*.jpg', recursive=True)

In [None]:
from PIL import Image  
import PIL

path = "data/bird_images/valid_flattened/"
new_filenames = []
for file in files:
    file_components = file.split("/")
    if int(file_components[-1][0]) > 2: continue
    new_filename = f"{file_components[-2]}-{file_components[-1][0]}"
    new_filenames.append(new_filename)

with open('data/bird_icon_names.txt', 'w+') as f:
    for items in new_filenames:
        f.write('%s\n' %items)
    print("File written successfully")
f.close()

In [None]:
newsize = (185, 185)
for file in files:
    img = Image.open(file)
    img.resize(newsize)
    file_components = file.split("/")
    if int(file_components[-1][0]) > 2: continue
    new_filename = f"{file_components[-2]}-{file_components[-1]}"
    img.save(f"{path}{new_filename}")

# Scraping ebird

In [None]:
df = pd.read_csv('data/ebird data.txt', sep="\t", header=0)

In [None]:
df.columns

In [None]:
df["COUNTY CODE"][:10]

In [None]:
df.COUNTY[:10]

In [None]:
BASE_URL = "https://ebird.org/checklist/S177363513"

In [None]:
r = requests.get(BASE_URL, verify=False)
r.raise_for_status()

soup = BeautifulSoup(r.content, 'html5lib')

In [None]:
def build_media_data_for_checklist(soup):
    species = soup.findAll('li', attrs = {'data-observationhasphotos':True, 'data-observationhasaudios':True}) 
    species_photos = soup.findAll('li', attrs = {'data-observationhasphotos':True}) 
    species_audios = soup.findAll('li', attrs = {'data-observationhasaudios':True}) 
    species.extend(species_photos)
    species.extend(species_audios)
    
    species_json = []
    for s in species:
        species_info = s.find('div', attrs={'class':'Observation-species'})
        species_soup = BeautifulSoup(str(s))
        # print(species_soup.prettify())
        href = species_info.find('a')['href']
        name = species_info.find('span').text
        media_id_elements = s.findAll('div', attrs = {'data-media-commonname':name})
        media_ids = []
        for e in media_id_elements:
            media_ids.append(e["data-media-id"])
        s_json = {'href':href, 'name':name, 'media_ids':media_ids}
        species_json.append(s_json)
        
    return species_json

In [None]:
species_json = build_media_data_for_checklist(soup)
species_json

# Getting recent data for region

In [None]:
load_dotenv()
access_token = os.getenv("EBIRD_API_ACCESS_TOKEN")

headers =  {"Content-Type":"application/json", "x-ebirdapitoken": access_token}
region_code = "US-CO-013"
code2 = "AL"

get_url = "https://api.ebird.org/v2/data/obs/{}/recent".format(region_code)
res = requests.get(get_url, headers=headers, verify=False)
res.json()

In [None]:
res_json = res.json()
print(len(res_json))

In [None]:
max_res = 100
get_checklists_url = "https://api.ebird.org/v2/product/lists/{}?maxResults={}".format(region_code, max_res)
res = requests.get(get_checklists_url, headers=headers, verify=False)
res.json()
res_json = res.json()
print(len(res_json))

In [None]:
res_json[:-10]

# Getting species intro

In [8]:
from selenium import webdriver
load_dotenv()
access_token = os.getenv("EBIRD_API_ACCESS_TOKEN")
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument('--headless=new')

path = "https://ebird.org/explore"
driver = webdriver.Chrome(options=options)
# driver = webdriver.Chrome()
driver.get(path)

In [9]:
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

species_name = "WHITE TAILED TROPIC"

input_e = driver.find_element(By.ID, "species")
input_e.send_keys(species_name)
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, "Suggest-suggestion-0"))).click()

species_desc_e = driver.find_element(By.CLASS_NAME, "u-stack-sm")
print(species_desc_e.text)

A medium-sized white seabird with black marks on the wings and a yellow beak. The long white tail streamers are visible from a great distance. Nests on coastal and inland cliffs on the main islands. Often seen flying over canyons and along cliffs. At sea usually flies high above the water. Calls are loud clucks and squawks. Smaller and more graceful in flight than Red-tailed Tropicbird.


# Produce the bird names file

In [11]:
new_bird_names = []
for file in files:
    file_components = file.split("/")
    if int(file_components[-1][0]) > 1: continue
    new_bird_name = file_components[-2]
    new_bird_names.append(new_bird_name)

with open('data/bird_names.txt', 'w+') as f:
    for items in new_bird_names:
        f.write('%s\n' %items)
    print("File written successfully")
f.close()

File written successfully


In [None]:
def get_desc(species_name, path, driver):
    driver.get(path)

    input_e = driver.find_element(By.ID, "species")
    input_e.send_keys(species_name)
    try:
        WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, "Suggest-suggestion-0"))).click()
        species_desc_e = driver.find_element(By.CLASS_NAME, "u-stack-sm")
        return {"status":200, "desc": species_desc_e.text}
    except Exception as e:
        return {"status":400, "error": str(e)}

In [None]:
file1 = open('data/bird_names.txt', 'r')
lines = file1.readlines()

failed_names = []
name_desc_pairs = {}
for line in lines:
    res = get_desc(line, path, driver)
    if res.status == 200:
        name_desc_pairs[line] = res.desc
    elif res.status == 400:
        print(f"{line} failed: {res.error}")
        failed_names.append(line)

# post processing
with open('data/failed_names.txt', 'w+') as f:
    for items in failed_names:
        f.write('%s\n' %items)
    print("Failed name file written successfully")
f.close()

import json

name_desc_pairs_obj = json.dumps(name_desc_pairs, indent=4)

with open("data/name_desc_pairs.json", "w") as f:
    f.write(name_desc_pairs_obj)
    print("Name desc pair file written successfully")

f.close()