# Bird Observation Data

### Import libraries and configure environment variable

In [None]:
import requests
import pandas as pd
import os

from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get('EBIRD_API_KEY')

### GET Requests to eBird API

In [None]:
# Fetches all recent recorded observations in PA
recent_observations_url = 'https://api.ebird.org/v2/data/obs/US-PA/recent'
recent_observations_params = {'key': os.environ.get('EBIRD_API_KEY')}
recent_observations = requests.get(recent_observations_url, params=recent_observations_params)

# Handle potential JSONDecodeError
try:
    recent_observations_data = recent_observations.json()
except requests.exceptions.JSONDecodeError:
    print("Error decoding JSON response for recent_observations:")
    print(recent_observations.text)
    exit(1)

### Create DataFrames

In [None]:
# Create a DataFrame to hold API response data
pa_observations_df = pd.DataFrame(recent_observations_data)

### Transform
- Drop unnecessary columns
- Rename columns for clarity

In [None]:
# Drop unneeded columns from observation data

# Specify the columns to be dropped
columns_to_drop = ['obsValid', 'obsReviewed', 'subId', 'exoticCategory']
# Drop the specified columns from the DataFrame
pa_observations_df = pa_observations_df.drop(columns=columns_to_drop)

# Rename columns for clarity in observation data
pa_observations_df = pa_observations_df.rename(columns={'speciesCode': 'Species_Code', 'comName': 'Common_Name', 'sciName': 'Scientific_Name', 
                                                        'locId': 'Location_ID', 'locName': 'Location_Name', 'obsDt' : 'Observation_Date', 
                                                        'howMany': 'Quantity_Observed', 'lat': 'Latitude', 'lng': 'Longitude', 
                                                        'locationPrivate': 'Location_Private'})

In [None]:
print(pa_observations_df.head())

### Write/append data to CSV files

In [None]:
# Append recent observations response to a CSV file
import csv
pa_observations_df.to_csv('recent_observations.csv', mode='a', index=True)

### Write observation data from Pandas DataFrame to SQLite database

#### Create SQLite Database

In [None]:
import sqlite3

# Specify column names
column_names = ['Species_Code', 'Common_Name', 'Scientific_Name', 'Location_ID', 'Location_Name',
                'Observation_Date', 'Quantity_Observed', 'Latitude', 'Longitude', 'Location_Private']

connection = sqlite3.connect("pa-observation-data.db")
cursor = connection.cursor()

cursor.execute("DROP TABLE IF EXISTS Observations;")

# Create the table with the appropriate columns
create_table_query = "CREATE TABLE Observations ({})".format(", ".join(column_names))
cursor.execute(create_table_query)

#### Write data to database

In [None]:
# Insert values from the DataFrame into the table
for i in range(len(pa_observations_df)):
    cursor.execute("INSERT INTO Observations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", pa_observations_df.iloc[i])

connection.commit()
connection.close()

In [None]:
pa_observations_df[(pa_observations_df.Common_Name == 'Pileated Woodpecker')]

In [None]:
pa_observations_df.dtypes

### Write observation data from CSV file to PostgreSQL database

In [None]:
# # This is no longer being used because I instead wrote the data to a SQLite database.
# # I was having an issue with the data types here, and plan to return to this later to figure it out.

# from sqlalchemy import create_engine, Table, Column, String, MetaData, Date, Integer, Boolean
# from sqlalchemy.orm import sessionmaker

# # PostgreSQL database connection details from .env file
# host = os.getenv('PG_HOST')
# database = os.getenv('PG_DATABASE')
# user = os.getenv('PG_USER')
# password = os.getenv('PG_PASSWORD')

# # Create SQLAlchemy engine
# engine = create_engine(f'postgresql://{user}:{password}@{host}/{database}', pool_pre_ping=True)

# # Create a session factory
# Session = sessionmaker(bind=engine)
# session = Session()

# # Type conversion of Location_ID column to remove preceding L, if necessary, to store the number as an integer.
# pa_observations_df['Location_ID'] = pa_observations_df['Location_ID'].astype(str).apply(lambda x: int(x[1:]) if x.startswith('L') else int(x))

# # Create SQLAlchemy table object
# metadata = MetaData()
# table = Table(
#     'Recent_Observations',
#     metadata,
#     Column('Index', Integer, primary_key=True, autoincrement=True),
#     Column('Species_Code', String),
#     Column('Common_Name', String),
#     Column('Scientific_Name', String),
#     Column('Location_ID', Integer),
#     Column('Location_Name', String),
#     Column('Observation_Date', String),
#     Column('Quantity_Observed', Integer),
#     Column('Latitude', String),
#     Column('Longitude', String),
#     Column('Location_Private', Boolean)
# )

# metadata.create_all(engine)

# # Insert the DataFrame into the database table
# try:
#     with engine.begin() as connection:
#         for index, row in pa_observations_df.iterrows():
#             print("Inserting row: ", row) # for debugging
#             insert_stmt = table.insert().values(
#                 Species_Code=row['Species_Code'],
#                 Common_Name=row['Common_Name'],
#                 Scientific_Name=row['Scientific_Name'],
#                 Location_ID=row['Location_ID'],
#                 Location_Name=row['Location_Name'],
#                 Observation_Date=row['Observation_Date'],
#                 Quantity_Observed=int(row['Quantity_Observed']),
#                 Latitude=float(row['Latitude']),
#                 Longitude=float(row['Longitude']),
#                 Location_Private=bool(row['Location_Private'])
#             )
#             connection.execute(insert_stmt)

#     print("Data insertion successful!")
# except Exception as e:
#     print("An error occurred during data insertion:", str(e))

# session.commit()