# Bird Observation Data

### Import libraries and configure environment variable

In [None]:
import requests
import pandas as pd
import os

from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get('EBIRD_API_KEY')

### GET Requests to eBird API

In [None]:
# Fetches all recent recorded observations in PA
recent_observations_url = 'https://api.ebird.org/v2/data/obs/US-PA/recent'
recent_observations_params = {'key': os.environ.get('EBIRD_API_KEY')}
recent_observations = requests.get(recent_observations_url, params=recent_observations_params)

# Handle potential JSONDecodeError
try:
    recent_observations_data = recent_observations.json()
except requests.exceptions.JSONDecodeError:
    print("Error decoding JSON response for recent_observations:")
    print(recent_observations.text)
    exit(1)

### Create DataFrames

In [None]:
# Create a DataFrame to hold API response data
pa_observations_df = pd.DataFrame(recent_observations_data)

### Transform
- Drop unnecessary columns
- Rename columns for clarity
- Remove duplicate data

In [None]:
# Drop unneeded columns from observation data

# Specify the columns to be dropped
columns_to_drop = ['obsValid', 'obsReviewed', 'exoticCategory']
# Drop the specified columns from the DataFrame
pa_observations_df = pa_observations_df.drop(columns=columns_to_drop)

In [None]:
# Rename columns for clarity in observation data
pa_observations_df = pa_observations_df.rename(columns={'speciesCode': 'species_code', 'comName': 'common_name', 'sciName': 'scientific_name', 
                                                        'locId': 'location_id', 'locName': 'location_name', 'obsDt' : 'observation_date', 
                                                        'howMany': 'quantity_observed', 'lat': 'latitude', 'lng': 'longitude', 
                                                        'locationPrivate': 'location_private', 'subId': 'submission_id'})

In [None]:
# Remove duplicate entries from DataFrame
pa_observations_deduplicated_df = pa_observations_df.drop_duplicates(keep='first')
pa_observations_deduplicated_df.to_csv('recent_observations.csv', index=False)

In [None]:
# Transform NaN values in the 'Quantity_Observed' column to a default value of 1
pa_observations_deduplicated_df['quantity_observed'].fillna(1, inplace=True)

In [None]:
print(pa_observations_deduplicated_df.head())

### Write/append data to CSV files

In [None]:
# Append de-duplicated recent observations data to a CSV file
import csv
pa_observations_deduplicated_df.to_csv('recent_observations.csv', mode='a', header=True, index=False)

### Write observation data from Pandas DataFrame to SQLite database

#### Create SQLite Database

In [None]:
import sqlite3

# Specify column names
column_names = ['species_code', 'common_name', 'scientific_name', 'location_id', 'location_name',
                'observation_date', 'quantity_observed', 'latitude', 'longitude', 'location_private', 'submission_id']

connection = sqlite3.connect("pa-observation-data.db")
cursor = connection.cursor()

cursor.execute("DROP TABLE IF EXISTS Observations;")

# Create the table with the appropriate columns
create_table_query = "CREATE TABLE Observations ({})".format(", ".join(column_names))
cursor.execute(create_table_query)

#### Write data to database

In [None]:
# Insert values from the DataFrame into the table
for i in range(len(pa_observations_deduplicated_df)):
    cursor.execute("INSERT INTO Observations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", pa_observations_deduplicated_df.iloc[i])

connection.commit()
connection.close()

In [None]:
# home = os.environ.get('LOCATION')
# pa_observations_df[(pa_observations_df.Location_ID == home)]

pa_observations_deduplicated_df[(pa_observations_deduplicated_df.common_name == 'Pileated Woodpecker')]

In [None]:
pa_observations_deduplicated_df.dtypes

### Write observation data from CSV file to PostgreSQL database

In [None]:
from sqlalchemy import create_engine

# PostgreSQL database connection details from .env file
host = os.getenv('PG_HOST')
database = os.getenv('PG_DATABASE')
user = os.getenv('PG_USER')
password = os.getenv('PG_PASSWORD')

# Create SQLAlchemy engine
engine = create_engine(f'postgresql://{user}:{password}@{host}/{database}', pool_pre_ping=True)

# Type conversion of Location_ID column to remove preceding L, if necessary, to store the number as an integer.
pa_observations_deduplicated_df['location_id'] = pa_observations_deduplicated_df['location_id'].astype(str).apply(lambda x: int(x[1:]) if x.startswith('L') else int(x))

table_name = "recent_observations"
try:
    pa_observations_deduplicated_df.to_sql(table_name, engine, if_exists='append', index=False)
    print("Data insertion successful!")
except Exception as e:
    print("An error occurred during data insertion:", str(e))