# Bird Data

### Import libraries and configure environment

In [126]:
import requests
import csv
import pandas as pd

import os
from sqlalchemy import create_engine, Table, Column, String, MetaData, Date, Integer, Boolean
from sqlalchemy.orm import sessionmaker

from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get('EBIRD_API_KEY')

### GET Requests to eBird API

In [127]:
# Fetches species codes for all recorded observations in PA
pa_species_url = 'https://api.ebird.org/v2/product/spplist/US-PA'
pa_species_params = {'key': os.environ.get('EBIRD_API_KEY')}
pa_species = requests.get(pa_species_url, params=pa_species_params)

# Handle potential JSONDecodeError
try:
    pa_species_data = pa_species.json()
except requests.exceptions.JSONDecodeError:
    print("Error decoding JSON response for pa_species:")
    print(pa_species.text)
    exit(1)

In [128]:
# Fetches all recent recorded observations in PA
recent_observations_url = 'https://api.ebird.org/v2/data/obs/US-PA/recent'
recent_observations_params = {'key': os.environ.get('EBIRD_API_KEY')}
recent_observations = requests.get(recent_observations_url, params=recent_observations_params)

# Handle potential JSONDecodeError
try:
    recent_observations_data = recent_observations.json()
except requests.exceptions.JSONDecodeError:
    print("Error decoding JSON response for recent_observations:")
    print(recent_observations.text)
    exit(1)

### Create DataFrames

In [129]:
# Create a DataFrame to hold API species response data
pa_species_df = pd.DataFrame(pa_species_data)

# Create a DataFrame to hold API observation response data
pa_observations_df = pd.DataFrame(recent_observations_data)

### Clean Returned Data

In [130]:
# Drop unneeded columns from observation data

# Specify the columns to be dropped
columns_to_drop = ['obsValid', 'obsReviewed', 'subId', 'exoticCategory']

# Drop the specified columns from the DataFrame
pa_observations_df = pa_observations_df.drop(columns=columns_to_drop)


In [131]:
# Rename columns for clarity in observation data

pa_observations_df = pa_observations_df.rename(columns={'speciesCode': 'Species_Code', 'comName': 'Common_Name', 'sciName': 'Scientific_Name', 
                                                        'locId': 'Location_ID', 'locName': 'Location_Name', 'obsDt' : 'Observation_Date', 
                                                        'howMany': 'Quantity_Observed', 'lat': 'Latitude', 'lng': 'Longitude', 
                                                        'locationPrivate': 'Location_Private'})

In [132]:
pa_species_df = pa_species_df.rename(columns={0: 'Species_Code'})

In [133]:
pa_species_df.head()

Unnamed: 0,Species_Code
0,bbwduc
1,bahgoo
2,snogoo
3,rosgoo
4,sxrgoo1


### Write cleaned data to CSV files

In [134]:
# Write PA species list response data to a CSV file
pa_species_df.to_csv('pa_species.csv', index=False)

# Write recent observations response to a CSV file
pa_observations_df.to_csv('recent_observations.csv', index=True)

### Write observation data from CSV file to PostgreSQL database

In [135]:
# Preview data
pa_observations_df.head()

Unnamed: 0,Species_Code,Common_Name,Scientific_Name,Location_ID,Location_Name,Observation_Date,Quantity_Observed,Latitude,Longitude,Location_Private
0,robgro,Rose-breasted Grosbeak,Pheucticus ludovicianus,L11345882,Home - Wertman Rd. Fogelsville,2023-05-29 05:38,2.0,40.601888,-75.646834,True
1,cedwax,Cedar Waxwing,Bombycilla cedrorum,L11345882,Home - Wertman Rd. Fogelsville,2023-05-29 05:38,4.0,40.601888,-75.646834,True
2,carwre,Carolina Wren,Thryothorus ludovicianus,L11345882,Home - Wertman Rd. Fogelsville,2023-05-29 05:38,2.0,40.601888,-75.646834,True
3,scatan,Scarlet Tanager,Piranga olivacea,L11345882,Home - Wertman Rd. Fogelsville,2023-05-29 05:38,2.0,40.601888,-75.646834,True
4,sonspa,Song Sparrow,Melospiza melodia,L20995515,nuthatch,2023-05-29 05:38,1.0,40.516611,-75.985266,True


In [138]:
# PostgreSQL database connection details from .env file
host = os.getenv('PG_HOST')
database = os.getenv('PG_DATABASE')
user = os.getenv('PG_USER')
password = os.getenv('PG_PASSWORD')

# CSV file path
csv_file = 'recent_observations.csv'

# Create SQLAlchemy engine
engine = create_engine(f'postgresql://{user}:{password}@{host}/{database}', pool_pre_ping=True)

# Create a session factory
Session = sessionmaker(bind=engine)
session = Session()

# Create SQLAlchemy table object
metadata = MetaData()
table = Table('Recent_Observations', metadata,
              Column('Index', primary_key=True, autoincrement=True),
              Column('Species_Code', String),
              Column('Common_Name', String),
              Column('Scientific_Name', String),
              Column('Location_ID', String),
              Column('Location_Name', String),
              Column('Observation_Date', Date),
              Column('Quantity_Observed', Integer),
              Column('Latitude', String),
              Column('Longitude', String),
              Column('Location_Private', Boolean)
              )

# Open the CSV file
with open(csv_file, 'r') as file:
    # Create a CSV reader object
    csv_data = csv.reader(file)

    # Skip the header row
    next(csv_data)

    # Insert each row of the CSV file into the database table
    for row in csv_data:
        insert_stmt = table.insert().values(Index=row[0], Species_Code=row[1], Common_Name=row[2], Scientific_Name=row[3],
                                            Location_ID=row[4], Location_Name=row[5], Observation_Date=row[6], Quantity_Observed=row[7], 
                                            Latitude=row[8], Longitude=row[9], Location_Private=row[10])
        session.execute(insert_stmt)

# Commit the changes to the database
session.commit()

# Close the session
session.close()


OperationalError: (psycopg2.OperationalError) connection to server at "localhost" (::1), port 5432 failed: FATAL:  password authentication failed for user "postgres1"

(Background on this error at: https://sqlalche.me/e/20/e3q8)