# iNaturalist Helpers Workbench

A Jupyter notebook that exercises `helpers.py` for iNaturalist data access, ranking, and park-level species summaries.

## 1) Setup: Imports, Local Module Reload, and Logging

Import analysis libraries, enable autoreload for iterative edits, and configure notebook logging.

In [None]:
import os
import logging
import datetime as dt

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

from pynat import helpers
from pynat.helpers import load_api_key, get_inat_session, get_mine, coming_soon, get_park_data

logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")
helpers.logger.setLevel(logging.INFO)

pd.options.display.max_rows = 200
pd.options.display.max_columns = 80

## 2) Credential Flow: `load_api_key()` and Environment Overrides

Demonstrate credential loading order (environment → keyring → fallback file) without printing secrets.

In [None]:
api_key = load_api_key()

if api_key:
    masked = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) >= 8 else "***"
    print(f"API key found (masked): {masked}")
else:
    print("No API key found via env/keyring/fallback file.")

for env_name in ("INAT_API_KEY", "PYINAT_API_KEY", "INAT_KEY", "INAT_TOKEN"):
    print(f"{env_name} set? {bool(os.getenv(env_name))}")

## 3) HTTP Client Setup: `get_inat_session()` with Optional Cache

Create a session, inspect auth header presence, and make a small repeated call to observe cache behavior.

In [None]:
session = get_inat_session(use_cache=True)
print("Authorization header present?", "Authorization" in session.headers)

url = "https://api.inaturalist.org/v1/observations"
params = {"per_page": 1, "page": 1}

r1 = session.get(url, params=params, timeout=30)
r1.raise_for_status()
r2 = session.get(url, params=params, timeout=30)
r2.raise_for_status()

print("First call status:", r1.status_code)
print("Second call status:", r2.status_code)
print("Keys in payload:", sorted(list(r1.json().keys()))[:6])

## 4) Fetch Personal Observations: `get_mine()` with Date Windows

Run observation queries for explicit date windows and lookback variants.

In [None]:
# Set your iNaturalist username here:
uname = "your_username_here"

# Explicit window example
start_date = dt.date.today() - dt.timedelta(days=14)
finish_date = dt.date.today() + dt.timedelta(days=1)

# Uncomment to run
# get_mine(uname=uname, STRT=start_date, FNSH=finish_date)

# Lookback example (from today)
# get_mine(uname=uname, lookback_in_days=7)

## 5) Seasonal Nearby Taxa: `coming_soon()` Basic Queries

Run for multiple kinds using lat/lng/radius and inspect resulting DataFrame columns.

In [None]:
target_loc = (37.6669, -77.8883, 25)

birds = coming_soon("birds", loc=target_loc, limit=15, fast=True, per_page=25, max_pages=2, fetch_images=False)
plants = coming_soon("plants", loc=target_loc, limit=15, fast=True, per_page=25, max_pages=2, fetch_images=False)
butterflies = coming_soon("butterflies", loc=target_loc, limit=15, fast=True, per_page=25, max_pages=2, fetch_images=False)

print("birds columns:", birds.columns.tolist())
print("plants rows:", len(plants), "butterflies rows:", len(butterflies))

display(birds.head(10))

## 6) Ranking Logic: Compare `norm=None`, `time`, `place`, `overall`

Run identical queries under each normalization mode and compare top-ranked taxa.

In [None]:
modes = [None, "time", "place", "overall"]
comparisons = {}
for mode in modes:
    key = "none" if mode is None else mode
    df_mode = coming_soon("birds", loc=target_loc, norm=mode, limit=20, fast=True, per_page=25, max_pages=2, fetch_images=False)
    df_mode = df_mode[["taxon.name", "taxon.preferred_common_name", "count"]].copy()
    df_mode["rank"] = np.arange(1, len(df_mode) + 1)
    comparisons[key] = df_mode

side_by_side = comparisons["none"][["taxon.name", "rank"]].rename(columns={"rank": "rank_none"})
for key in ["time", "place", "overall"]:
    tmp = comparisons[key][["taxon.name", "rank"]].rename(columns={"rank": f"rank_{key}"})
    side_by_side = side_by_side.merge(tmp, on="taxon.name", how="outer")

display(side_by_side.sort_values("rank_none", na_position="last").head(20))

## 7) Park Snapshot: `get_park_data()` and Relative Frequency

Retrieve top taxa by relative frequency for one or more parks and compare across kinds.

In [None]:
parks = {
    "tucker": (37.66713, -77.88739, 0.4),
    "hiddenrock": (37.70219, -77.87333, 0.63),
}

park_tables = []
for park_name, geocenter in parks.items():
    for kind in ["birds", "plants"]:
        df_park = get_park_data(geocenter, kind=kind, limit=10, per_page=25, max_pages=2)
        df_park = df_park.copy()
        df_park["park"] = park_name
        df_park["kind"] = kind
        park_tables.append(df_park)

park_summary = pd.concat(park_tables, ignore_index=True) if park_tables else pd.DataFrame()
display(park_summary.head(20))

## 8) Result Shaping: Sorting, Filtering, and CSV Export

Apply deterministic sorting, filter null common names, and export cleaned tables for downstream analysis.

In [None]:
if not park_summary.empty:
    final_cols = ["park", "kind", "count", "taxon.name", "taxon.preferred_common_name", "taxon.wikipedia_url"]
    final_cols = [c for c in final_cols if c in park_summary.columns]

    final = park_summary[final_cols].copy()
    if "taxon.preferred_common_name" in final.columns:
        final = final[final["taxon.preferred_common_name"].notna()]

    sort_cols = [c for c in ["park", "kind", "count", "taxon.name"] if c in final.columns]
    ascending = [True, True, False, True][:len(sort_cols)]
    final = final.sort_values(sort_cols, ascending=ascending).reset_index(drop=True)

    out_csv = "pynat/park_summary_export.csv"
    final.to_csv(out_csv, index=False)
    print(f"Exported: {out_csv}")
    display(final.head(30))
else:
    print("No park summary rows to export yet.")