# Bird Data

### Import libraries and configure environment

In [1]:
import requests
import csv
import pandas as pd

from matplotlib import pyplot as plt

import os
from sqlalchemy import create_engine, Table, Column, String, MetaData, Date, Integer, Boolean
from sqlalchemy.orm import sessionmaker

from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get('EBIRD_API_KEY')

### GET Requests to eBird API

In [2]:
# Fetches species codes for all recorded observations in PA
pa_species_url = 'https://api.ebird.org/v2/product/spplist/US-PA'
pa_species_params = {'key': os.environ.get('EBIRD_API_KEY')}
pa_species = requests.get(pa_species_url, params=pa_species_params)

# Handle potential JSONDecodeError
try:
    pa_species_data = pa_species.json()
except requests.exceptions.JSONDecodeError:
    print("Error decoding JSON response for pa_species:")
    print(pa_species.text)
    exit(1)

In [3]:
# Fetches all recent recorded observations in PA
recent_observations_url = 'https://api.ebird.org/v2/data/obs/US-PA/recent'
recent_observations_params = {'key': os.environ.get('EBIRD_API_KEY')}
recent_observations = requests.get(recent_observations_url, params=recent_observations_params)

# Handle potential JSONDecodeError
try:
    recent_observations_data = recent_observations.json()
except requests.exceptions.JSONDecodeError:
    print("Error decoding JSON response for recent_observations:")
    print(recent_observations.text)
    exit(1)

In [4]:
# Fetches PA birding hotspots (in JSON format)
pa_hotspots_url = 'https://api.ebird.org/v2/ref/hotspot/US-PA?fmt=json'
pa_hotspots_params = {'key': os.environ.get('EBIRD_API_KEY')}
pa_hotspots = requests.get(pa_hotspots_url, params=pa_hotspots_params)

# Handle potential JSONDecodeError
try:
    pa_hotspots_data = pa_hotspots.json()
except requests.exceptions.JSONDecodeError:
    print("Error decoding JSON response for pa_hotspots:")
    print(pa_hotspots.text)
    exit(1)

In [5]:
# # Fetches details about particular hotspot
# hotspot_info_url = 'https://api.ebird.org/v2/ref/hotspot/info/{locId}'
# hotspot_info_params = {'key': os.environ.get('EBIRD_API_KEY')}
# hotspot_info = requests.get(hotspot_info_url, params=hotspot_info_params)

# # Handle potential JSONDecodeError
# try:
#     hotspot_info_data = hotspot_info.json()
# except requests.exceptions.JSONDecodeError:
#     print("Error decoding JSON response for hotspot_info:")
#     print(hotspot_info.text)
#     exit(1)

### Create DataFrames

In [6]:
# Create a DataFrame to hold API species response data
pa_species_df = pd.DataFrame(pa_species_data)

# Create a DataFrame to hold API observation response data
pa_observations_df = pd.DataFrame(recent_observations_data)

# Create a DataFrame to hold API response data for list of PA hotspots
pa_hotspots_df = pd.DataFrame(pa_hotspots_data)

# # Create a DataFrame to hold API response for hotspot location info
# hotspot_info_df = pd.DataFrame(hotspot_info_data)

### Clean Returned Data

In [7]:
# Drop unneeded columns from observation data

# Specify the columns to be dropped
columns_to_drop = ['obsValid', 'obsReviewed', 'subId', 'exoticCategory']

# Drop the specified columns from the DataFrame
pa_observations_df = pa_observations_df.drop(columns=columns_to_drop)

In [8]:
# Drop unneeded columns from the hotspot data

# Specify the columns to be dropped
columns_to_drop = ['countryCode', 'subnational1Code']

# Drop the specified columns from the DataFrame
pa_hotspots_df = pa_hotspots_df.drop(columns=columns_to_drop)

In [9]:
# Rename columns for clarity in observation data
pa_observations_df = pa_observations_df.rename(columns={'speciesCode': 'Species_Code', 'comName': 'Common_Name', 'sciName': 'Scientific_Name', 
                                                        'locId': 'Location_ID', 'locName': 'Location_Name', 'obsDt' : 'Observation_Date', 
                                                        'howMany': 'Quantity_Observed', 'lat': 'Latitude', 'lng': 'Longitude', 
                                                        'locationPrivate': 'Location_Private'})

In [10]:
# Rename columns for clarity in hotspot data
pa_hotspots_df = pa_hotspots_df.rename(columns={'locId': 'Location_ID', 'locName': 'Location_Name', 'subnational2Code': 'Sub-National_Code',
                                                 'lat': 'Latitude', 'lng': 'Longitude', 'latestObsDt': 'Date_of_Most_Recent_Observation', 
                                                 'numSpeciesAllTime': 'Total_Species_Recorded_at_Location'})

In [11]:
pa_species_df = pa_species_df.rename(columns={0: 'Species_Code'})

### Write cleaned data to CSV files

In [12]:
# Write PA species list response data to a CSV file
pa_species_df.to_csv('pa_species.csv', index=False)

# Write recent observations response to a CSV file
pa_observations_df.to_csv('recent_observations.csv', index=False)

# Write PA hotspot response to a CSV file
pa_hotspots_df.to_csv('pa_hotspots_data.csv', index=False)

### Write observation data from CSV file to PostgreSQL database

In [13]:
# # PostgreSQL database connection details from .env file
# host = os.getenv('PG_HOST')
# database = os.getenv('PG_DATABASE')
# user = os.getenv('PG_USER')
# password = os.getenv('PG_PASSWORD')

# # CSV file path
# csv_file = 'recent_observations.csv'

# # Create SQLAlchemy engine
# engine = create_engine(f'postgresql://{user}:{password}@{host}/{database}', pool_pre_ping=True)

# # Create a session factory
# Session = sessionmaker(bind=engine)
# session = Session()

# # Create SQLAlchemy table object
# metadata = MetaData()
# table = Table('Recent_Observations', metadata,
#               Column('Index', primary_key=True, autoincrement=True),
#               Column('Species_Code', String),
#               Column('Common_Name', String),
#               Column('Scientific_Name', String),
#               Column('Location_ID', String),
#               Column('Location_Name', String),
#               Column('Observation_Date', Date),
#               Column('Quantity_Observed', Integer),
#               Column('Latitude', String),
#               Column('Longitude', String),
#               Column('Location_Private', Boolean)
#               )

# # Open the CSV file
# with open(csv_file, 'r') as file:
#     # Create a CSV reader object
#     csv_data = csv.reader(file)

#     # Skip the header row
#     next(csv_data)

#     # Insert each row of the CSV file into the database table
#     for row in csv_data:
#         insert_stmt = table.insert().values(Index=row[0], Species_Code=row[1], Common_Name=row[2], Scientific_Name=row[3],
#                                             Location_ID=row[4], Location_Name=row[5], Observation_Date=row[6], Quantity_Observed=row[7], 
#                                             Latitude=row[8], Longitude=row[9], Location_Private=row[10])
#         session.execute(insert_stmt)

# # Commit the changes to the database
# session.commit()

# # Close the session
# session.close()


In [14]:
pa_observations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251 entries, 0 to 250
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Species_Code       251 non-null    object 
 1   Common_Name        251 non-null    object 
 2   Scientific_Name    251 non-null    object 
 3   Location_ID        251 non-null    object 
 4   Location_Name      251 non-null    object 
 5   Observation_Date   251 non-null    object 
 6   Quantity_Observed  248 non-null    float64
 7   Latitude           251 non-null    float64
 8   Longitude          251 non-null    float64
 9   Location_Private   251 non-null    bool   
dtypes: bool(1), float64(3), object(6)
memory usage: 18.0+ KB


In [None]:
pa_observations_df[(pa_observations_df.Common_Name == 'Pileated Woodpecker')]