In [6]:
import pandas as pd
import os

# List of existing CSV files
csv_files = [
    "L257959_bird_data.csv", "L207315_bird_data.csv", "L162766_bird_data.csv",
    "L351484_bird_data.csv", "L321969_bird_data.csv", "L298030_bird_data.csv",
    "L269461_bird_data.csv", "L232479_bird_data.csv", "L128530_bird_data.csv"
]

# Load files into DataFrames
dataframes = {file: pd.read_csv(file) for file in csv_files}

In [10]:
import requests

# iNaturalist API endpoint
url = "https://api.inaturalist.org/v1/observations"

# Function to fetch iNaturalist data for a specific location
def fetch_inat_data(lat, lng, radius=5, start_date=None, end_date=None):
    params = {
        "lat": lat,
        "lng": lng,
        "radius": 1,  # Radius in kilometers
        "d1": start_date,  # Start date
        "d2": end_date,    # End date
        "taxon_id": 3,     # Taxon ID for birds
        "per_page": 100,
        "page": 1
    }
    observations = []
    while True:
        response = requests.get(url, params=params)
        data = response.json()
        
        # Break if no results
        if not data["results"]:
            break
        
        # Extract relevant fields
        for obs in data["results"]:
            observations.append({
                "Species": obs["taxon"]["name"],
                "Location": obs["place_guess"],
                "Latitude": obs["geojson"]["coordinates"][1],
                "Longitude": obs["geojson"]["coordinates"][0],
                "Observation Date": obs["observed_on"],
                "Count": obs.get("individual_count", "N/A"),
            })
        
        # Go to the next page
        params["page"] += 1
    return pd.DataFrame(observations)

# Example: Fetch data for a location
lat, lng = 47.6621, -122.1234  # Example latitude and longitude
new_data = fetch_inat_data(lat, lng, start_date="2023-01-01", end_date="2023-12-31")

In [11]:
from datetime import datetime

# Function to update a CSV file with new observations
def update_csv(file, dataframe):
    # Extract latitude and longitude from the first row of the DataFrame
    lat, lng = dataframe["Latitude"].iloc[0], dataframe["Longitude"].iloc[0]
    
    # Fetch new data for the location
    today = datetime.now().strftime("%Y-%m-%d")
    new_data = fetch_inat_data(lat, lng, start_date="2023-01-01", end_date=today)
    
    # Append new data to the existing DataFrame
    updated_data = pd.concat([dataframe, new_data], ignore_index=True)
    
    # Drop duplicates based on Species and Observation Date
    updated_data.drop_duplicates(subset=["Species", "Observation Date"], inplace=True)
    
    # Save the updated DataFrame back to the CSV
    updated_data.to_csv(file, index=False)
    print(f"Updated {file} with {len(new_data)} new records.")

# Update each CSV
for file, df in dataframes.items():
    update_csv(file, df)

Updated L257959_bird_data.csv with 104 new records.
Updated L207315_bird_data.csv with 163 new records.
Updated L162766_bird_data.csv with 4277 new records.
Updated L351484_bird_data.csv with 673 new records.
Updated L321969_bird_data.csv with 549 new records.
Updated L298030_bird_data.csv with 1518 new records.
Updated L269461_bird_data.csv with 1001 new records.
Updated L232479_bird_data.csv with 912 new records.
Updated L128530_bird_data.csv with 1029 new records.


In [12]:
for file in csv_files:
    updated_df = pd.read_csv(file)
    print(f"{file}: {len(updated_df)} records after update.")

L257959_bird_data.csv: 112 records after update.
L207315_bird_data.csv: 157 records after update.
L162766_bird_data.csv: 3255 records after update.
L351484_bird_data.csv: 581 records after update.
L321969_bird_data.csv: 477 records after update.
L298030_bird_data.csv: 1223 records after update.
L269461_bird_data.csv: 909 records after update.
L232479_bird_data.csv: 788 records after update.
L128530_bird_data.csv: 871 records after update.


In [13]:
# Check number of rows in each file
for file in csv_files:
    df = pd.read_csv(file)  # Load CSV into DataFrame
    num_rows = df.shape[0]  # Get number of rows
    print(f"{file} has {num_rows} rows.")

L257959_bird_data.csv has 112 rows.
L207315_bird_data.csv has 157 rows.
L162766_bird_data.csv has 3255 rows.
L351484_bird_data.csv has 581 rows.
L321969_bird_data.csv has 477 rows.
L298030_bird_data.csv has 1223 rows.
L269461_bird_data.csv has 909 rows.
L232479_bird_data.csv has 788 rows.
L128530_bird_data.csv has 871 rows.


In [14]:
# Loop through each CSV file to remove the specified columns
for file in csv_files:
    df = pd.read_csv(file)  # Load CSV into DataFrame
    
    # Check if columns exist before dropping to avoid errors
    if 'Count' in df.columns:
        df = df.drop(columns=['Count'])
    
    # Save the updated DataFrame back to the CSV
    df.to_csv(file, index=False)
    print(f"Updated {file} by removing 'Count' column.")

Updated L257959_bird_data.csv by removing 'Count' column.
Updated L207315_bird_data.csv by removing 'Count' column.
Updated L162766_bird_data.csv by removing 'Count' column.
Updated L351484_bird_data.csv by removing 'Count' column.
Updated L321969_bird_data.csv by removing 'Count' column.
Updated L298030_bird_data.csv by removing 'Count' column.
Updated L269461_bird_data.csv by removing 'Count' column.
Updated L232479_bird_data.csv by removing 'Count' column.
Updated L128530_bird_data.csv by removing 'Count' column.
