In [1]:
## environment
# conda create -n base2023geonat python numpy pandas scipy matplotlib jupyterlab nodejs plotly dill; conda install -c conda-forge pyinaturalist; 
# pip install ipyplot
# conda activate base2023geonat; conda update --all; conda install -c conda-forge 'pyrate-limiter<3' ## for now required pending fix

## imports
import pandas as pd
import datetime as dt
import ipyplot
import pyinaturalist as inat
import sys

sys.path.insert(0, r"C:\Users\drsvs\Desktop\code")
from pynat import helpers

pd.options.display.max_rows=250

In [2]:
%load_ext autoreload
%autoreload 2

In [None]:
assert False, "run once to set up your iNat access key"

## save API_KEY

API_KEY = ""

with open('pyinaturalistkey.pkd', 'wb') as f:
    dill.dump(API_KEY, f)

# ## load api key
# with open('pyinaturalistkey.pkd', 'rb') as f:
#     API_KEY = dill.load(f)

# import logging
# logging.basicConfig()
# logging.getLogger('pyinaturalist').setLevel('INFO')

### learn about identifications by location of identified not location of identifier

In [None]:
PLACES = {1491:'Powhatan County',
          2920:'Goochland County',
          3032:'Louisa County'}
local_ids = inat.v1.identifications.get_identifications(place_id=list(PLACES.keys()))
local_ids['results'][0].keys()

### check accuracy of identifications

# tri-county stats

In [None]:
PLACES = {1491:'Powhatan County',
          2920:'Goochland County',
          3032:'Louisa County'}

total_observations = inat.get_observations(
    place_id=list(PLACES.keys()),
    verifiable=True,
    per_page=0,
)['total_results']
print(f'Total observations: {total_observations}')

total_taxa = inat.get_observation_species_counts(
    place_id=list(PLACES.keys()),
    verifiable=True,
    per_page=0,
)['total_results']
print(f'Total taxa observed: {total_taxa}')

total_identifiers = inat.get_observation_identifiers(place_id=list(PLACES.keys()), per_page=0)['total_results']
print(f'Total identifiers: {total_identifiers}')

total_observers = inat.get_observation_observers(place_id=list(PLACES.keys()), per_page=0)['total_results']
print(f'Total observers: {total_observers}')

In [None]:
## helper functions from pyinaturalist example

THROTTLING_DELAY = 1.0  # Time to wait in between subsequent requests
TAXON_IMAGE_URL = 'https://raw.githubusercontent.com/inaturalist/inaturalist/main/app/assets/images/iconic_taxa/{taxon}-75px.png'
iconic_taxa = list(inat.ICONIC_TAXA.values())
iconic_taxa.remove('Unknown')

# Run one search for each iconic taxon
def get_iconic_taxa_counts(function):
    iconic_taxa_counts = {}
    for taxon_name in iconic_taxa:
        total_taxon_observations = function(
            place_id=list(PLACES.keys()),
            iconic_taxa=taxon_name,
            verifiable=True,
            per_page=0,
        )['total_results']

        iconic_taxa_counts[taxon_name] = total_taxon_observations
        print(f'Total results for {taxon_name}: {total_taxon_observations}')
        if taxon_name != iconic_taxa[-1]:
            sleep(THROTTLING_DELAY)
    return iconic_taxa_counts

def get_iconic_icon(taxon_name):
    return TAXON_IMAGE_URL.format(taxon=taxon_name.lower())

In [None]:
total_observations_by_iconic_taxon = get_iconic_taxa_counts(inat.get_observations)

# Create a chart, sorted by number of observations, using the appropriate iNaturalist icons
observations_df = pd.DataFrame(
    [
        {'iconic taxon': k, 'observations': v, 'img': get_iconic_icon(k)}
        for k, v in total_observations_by_iconic_taxon.items()
    ]
)

# ## display with altair
# alt.Chart(
#     observations_df,
#     title=f'Verifiable observations in {PLACE_NAME} by iconic taxon',
#     width=750,
#     height=500,
# ).mark_image().encode(x=alt.X('iconic taxon:N', sort='-y'), y='observations:Q', url='img')

observations_df.sort_values('observations', ascending=False)

In [None]:
observations_by_year = inat.get_observation_histogram(
    place_id=list(PLACES.keys()),
    interval='year',
    d1='2008-01-01',
    d2=dt.date.today(),
    verifiable=True,
)
observations_by_year = pd.DataFrame(
    [{'date': k, 'observations': v} for k, v in observations_by_year.items()]
)
observations_by_year.set_index('date').plot();

In [None]:
observations_by_month = inat.get_observation_histogram(
    place_id=list(PLACES.keys()),
    interval='month',
    d1='2016-01-02',
    d2=dt.date.today(),
    verifiable=True,
)
observations_by_month = pd.DataFrame(
    [{'metric': 'Observations', 'date': k, 'count': v} for k, v in observations_by_month.items()]
)
observations_by_month.set_index('date').plot();

In [None]:
monthly_observations = observations_by_month.groupby(observations_by_month['date'].dt.month)['count'].sum().to_frame()
monthly_observations['metric'] = 'Observations'
monthly_observations['count'].plot()

In [None]:
## helper functions from pyinaturalist example
def count_date_range_results(function, start_date, end_date):
    """Get the count of results for the given date range and search function"""
    # Running this search with per_page=0 will (quickly) return only a count of results, not complete results
    response = function(
        place_id=list(PLACES.keys()),
        d1=start_date,
        d2=end_date,
        verifiable=True,
        per_page=0,
    )
    print(f'Total results for {start_date.strftime("%b")}: {response["total_results"]}')
    return response['total_results']


def get_monthly_counts(function, label):
    """Get the count of results per month for the given search function"""
    month_ranges = inat.get_interval_ranges(dt.datetime(2016, 1, 1), dt.datetime.today(), 'month')
    counts_by_month = {
        start_date: count_date_range_results(function, start_date, end_date)
        for (start_date, end_date) in month_ranges
    }
    return pd.DataFrame(
        [{'metric': label, 'date': k, 'count': v} for k, v in counts_by_month.items()]
    )

### this part didn't quite work...

In [None]:
taxa_by_month = get_monthly_counts(inat.get_observation_species_counts, 'Taxa')
observers_by_month = get_monthly_counts(inat.get_observation_observers, 'Observers')
identifiers_by_month = get_monthly_counts(inat.get_observation_identifiers, 'Identifiers')

In [None]:
# taxa_by_month
combined_results = pd.concat([taxa_by_month, observations_by_month, observers_by_month, identifiers_by_month]).pivot(index='date', columns='metric', values='count')
combined_results.plot()

### back on track after simplifying the scope...

In [None]:
observations = []     
taxa = []
observers = []
identifiers = []

for month in range(1,13):
    observations.append(inat.get_observations(place_id=list(PLACES.keys()), month=month, verifiable=True, per_page=0)['total_results'])
    taxa.append(inat.get_observation_species_counts(place_id=list(PLACES.keys()), month=month, verifiable=True, per_page=0)['total_results'])
    observers.append(inat.get_observation_observers(place_id=list(PLACES.keys()), month=month, verifiable=True, per_page=0)['total_results'])
    identifiers.append(inat.get_observation_identifiers(place_id=list(PLACES.keys()), month=month, verifiable=True, per_page=0)['total_results'])
    
counts = pd.DataFrame({'observations':observations, 'taxa':taxa, 'observers':observers, 'identifiers':identifiers})
counts.plot()

### maybe want to separately count observers vs species IDs? two ways of removing bias.

In [None]:
## Question: What if I wanted to contrast observation rate versus identification rate? I'd want to index that on the time of identification not the time of observation. 

# convenience: my recent observations, formatted for photo names

In [6]:
## missing captive/cultivated??

# helpers.get_mine(uname='schizoform', lookback_to=dt.datetime(2024,1,29))
helpers.get_mine(uname='schizoform', 
                 STRT=dt.datetime(2025,7,27,0,0,0),
                 #FNSH=dt.datetime(2024,8,12,0,0,0),
                )

## lookback_to -> lookback_at 




20250727 Lepidoptera (Butterflies and Moths) [inat obs id: 301661744]




20250727 Reynoutria japonica (Reynoutria japonica) [inat obs id: 301661745]




20250727 Amanita (Amanita) [inat obs id: 301661747]




20250727 Castanea dentata (American chestnut) [inat obs id: 301661749]




20250727 Conopholis americana (Conopholis americana) [inat obs id: 301661750]




20250727 Eutrochium purpureum (Eutrochium purpureum) [inat obs id: 301661752]




20250727 Magnolia fraseri (Fraser Magnolia) [inat obs id: 301661754]




20250727 Anura (Anura) [inat obs id: 301664998]




20250727 Anura (Anura) [inat obs id: 301664997]




20250727 Oxydendrum arboreum (Oxydendrum arboreum) [inat obs id: 301665002]




20250727 Pinus pungens (Pinus pungens) [inat obs id: 301665003]




20250727 Argia moesta (Argia moesta) [inat obs id: 301665004]




20250727 Anura (Anura) [inat obs id: 301664999]




20250727 Poaceae (Poaceae) [inat obs id: 301665010]




20250727 Oestroidea (Oestroidea) [inat obs id: 301665012]




20250727 Myrmeleon immaculatus (Immaculate Antlion) [inat obs id: 301665023]




20250727 Lasallia (Lasallia) [inat obs id: 301665013]




20250727 Oxydendrum arboreum (Oxydendrum arboreum) [inat obs id: 301665026]




20250727 Vaccinium stamineum (Vaccinium stamineum) [inat obs id: 301665027]




20250727 Tilia americana (Tilia americana) [inat obs id: 301665031]




20250727 Eriophyes tiliae (Eriophyes tiliae) [inat obs id: 301665032]




20250727 Suillelloideae (Suillelloideae) [inat obs id: 301665033]




20250727 Tylopilus (Tylopilus) [inat obs id: 301665038]




20250727 Retiboletus (Retiboletus) [inat obs id: 301665037]




20250727 Hypomyces chrysospermus (Hypomyces chrysospermus) [inat obs id: 301665040]




20250727 Lactarius psammicola (Lactarius psammicola) [inat obs id: 301665047]




20250727 Boletus separans (Boletus separans) [inat obs id: 301665048]




20250727 Thelephora vialis (Thelephora vialis) [inat obs id: 301665055]




20250727 Baorangia bicolor (Baorangia bicolor) [inat obs id: 301665054]




20250727 Boletaceae (Boletaceae) [inat obs id: 301665057]




20250727 Boletaceae (Boletaceae) [inat obs id: 301665060]




20250727 Ramariopsis kunzei (Ramariopsis kunzei) [inat obs id: 301665062]




20250727 Turbinellus floccosus (Turbinellus floccosus) [inat obs id: 301665065]




20250727 Tipularia discolor (crane-fly orchid) [inat obs id: 301665067]




20250727 Diphasiastrum digitatum (fan clubmoss) [inat obs id: 301665069]




20250727 Commelina communis (Commelina communis) [inat obs id: 301665070]




20250727 Passiflora incarnata (purple passionflower) [inat obs id: 301681273]




20250727 Tipularia discolor (crane-fly orchid) [inat obs id: 301681276]


# bloomers
The goal here is to see what plants will be blooming when in my local region. But along the way...

### Coming soon!

what plants might be blooming nest? seeding next? what mushrooms might I see? what (esp. migratory) birds? when are mammals active? 

* DONE alternate entrypoint of specifying lat/long/dist instead of places
* try to narrow time frame to consistently 21-day period?
* normalizations for sort order:
   1. count of all observations at time/place
   2. count of all observations in time and at place separately
   3. count of all observations at time/place by taxa
   4. count of all observations in time and at place separately by taxa
   5. (similar but by phenology?)
   6. (separate totals for 'research grade' and 'informal' counts)
* photographs should match requested phenotype where possible
* add support for caterpillars/butterflies (and similar for benthic macroinverts?)
* split animals by clade and/or generalize interface?

In [None]:
PLACES = {1491:'Powhatan County',
          2920:'Goochland County',
          3032:'Louisa County'}

total_observations = inat.get_observations(
    place_id=list(PLACES.keys()),
    month=list(set( [(dt.date.today()+dt.timedelta(days=-7)).month, (dt.date.today()+dt.timedelta(days=7)).month] )),
    taxon_name='plants',
    verifiable=True,
    per_page=0,
)['total_results']
print(f'Total plant observations: {total_observations}')

inat.get_observations(
    place_id=list(PLACES.keys()),
    month=list(set( [(dt.date.today()+dt.timedelta(days=-7)).month, (dt.date.today()+dt.timedelta(days=7)).month] )),
    taxon_name='plants',
    verifiable=True,
    per_page=0,
)

In [None]:
# target_loc = (37.679849,-77.442868,10) # upper_chick
target_loc = (47.9,-91.6,250) # BWCA Farm Lake
limit = 7
norm = 'overall' # 'time'

In [None]:
# helpers.coming_soon('flowers', places=list(PLACES.keys()))
res = helpers.coming_soon('flowers', loc=target_loc, norm=norm, limit=limit)

In [None]:
res = helpers.coming_soon('fruits', loc=target_loc, norm=norm, limit=limit)

In [None]:
res = helpers.coming_soon('mushrooms', loc=target_loc, norm=norm, limit=limit)

In [None]:
res = helpers.coming_soon('birds', loc=target_loc, norm=norm, limit=limit)

In [None]:
res = helpers.coming_soon('herps', loc=target_loc, norm=norm, limit=limit)

In [None]:
res = helpers.coming_soon('mammals', loc=target_loc, norm=norm, limit=limit)

In [None]:
### NOTE: should exclude amphibia
res = helpers.coming_soon('wugs', loc=target_loc, norm=norm, limit=limit)

In [None]:
## filter images to show desired life stage
## note: graceful handling of null return values

res = helpers.coming_soon('caterpillars', loc=target_loc, norm='overall', limit=3)

In [None]:
## filter images to show desired life stage

res = helpers.coming_soon('butterflies', loc=target_loc, norm='overall', limit=3)

# OLD

## confidence_manimal

general idea is to look at the rate at which I ID vs withdraw ID, slash the rate at which I nevermind-withdrawn ID vs the community taxon. how full of shit am I?

In [None]:
## get a list of all the clades I provided an ID
## idea: give it a user name and an ID -- what percent were subsequently confirmed accurate? confirmed inaccurate?

In [None]:
## step one: get all a usr's observations (e.g. mine)


def get_species_preview(api_key: str,
                        user: Tuple[float, float], 
                       ) -> pd.DataFrame:
    """
    Fetches observations of flowering plants from the iNaturalist API within a specified distance 
    of a given location over the next two weeks for the past 10 years.

    Args:
        location (Tuple[float, float]): The latitude and longitude of the location of interest.
        distance (Union[int, float]): The radius around the location of interest, in kilometers.
        api_key (str): The iNaturalist API key.

    Returns:
        pd.DataFrame: A DataFrame containing the observations of flowering plants.

    Note:
        The taxon_id parameter is set to 47126, which is the ID for the kingdom Plantae. This will 
        return observations of all plants. If you're interested in a specific group of plants, 
        you'll need to find the appropriate taxon ID.

        The term_id parameter is set to 12, which is the ID for "flowering". This will return 
        observations where the plant was reported to be flowering.
    """
    # Define the base URL for the iNaturalist API
    base_url = "https://api.inaturalist.org/v1/observations"

    # Fetch observations
    
    # Define the parameters for the API request
    params: Dict[str, Union[str, int, float]] = {
        
        "order": "desc",
        "order_by": "observed_on",
        #"term_id": 13,  # Term ID for flowering
    }

    # Make the API request
    response = requests.get(base_url, params=params, headers={"Authorization": "Bearer " + api_key})

    # Convert the response to a pandas DataFrame and append it to the results DataFrame
    df.append(pd.json_normalize(response.json()["results"]))

