In [1]:
## environment
# conda create -n base2023geonat python numpy pandas scipy matplotlib jupyterlab nodejs plotly dill; conda install -c conda-forge pyinaturalist; conda install -c conda-forge 'pyrate-limiter<3' ## for now required pending fix

## imports
import requests
import pandas as pd
import datetime as dt
from typing import Tuple, Union
from time import sleep
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from io import BytesIO
import pyinaturalist as inat
import dill
import os

pd.options.display.max_rows=250

In [None]:
assert False, "run once to set up your iNat access key"

## save API_KEY

API_KEY = ""

with open('pyinaturalistkey.pkd', 'wb') as f:
    dill.dump(API_KEY, f)

In [2]:
## load api key
with open('pyinaturalistkey.pkd', 'rb') as f:
    API_KEY = dill.load(f)

### learn about identifications by location of identified not location of identifier

In [None]:
PLACES = {1491:'Powhatan County',
          2920:'Goochland County',
          3032:'Louisa County'}
local_ids = inat.v1.identifications.get_identifications(place_id=PLACES)

In [None]:
local_ids

In [None]:
local_ids['results'][0].keys()

### check accuracy of identifications

# tri-county stats

In [None]:
PLACES = {1491:'Powhatan County',
          2920:'Goochland County',
          3032:'Louisa County'}

total_observations = inat.get_observations(
    place_id=PLACES.keys(),
    verifiable=True,
    per_page=0,
)['total_results']
print(f'Total observations: {total_observations}')

total_taxa = inat.get_observation_species_counts(
    place_id=PLACES.keys(),
    verifiable=True,
    per_page=0,
)['total_results']
print(f'Total taxa observed: {total_taxa}')

total_identifiers = inat.get_observation_identifiers(place_id=PLACES.keys(), per_page=0)['total_results']
print(f'Total identifiers: {total_identifiers}')

total_observers = inat.get_observation_observers(place_id=PLACES.keys(), per_page=0)['total_results']
print(f'Total observers: {total_observers}')

In [None]:
## helper functions from pyinaturalist example

THROTTLING_DELAY = 1.0  # Time to wait in between subsequent requests
TAXON_IMAGE_URL = 'https://raw.githubusercontent.com/inaturalist/inaturalist/main/app/assets/images/iconic_taxa/{taxon}-75px.png'
iconic_taxa = list(inat.ICONIC_TAXA.values())
iconic_taxa.remove('Unknown')

# Run one search for each iconic taxon
def get_iconic_taxa_counts(function):
    iconic_taxa_counts = {}
    for taxon_name in iconic_taxa:
        total_taxon_observations = function(
            place_id=PLACES.keys(),
            iconic_taxa=taxon_name,
            verifiable=True,
            per_page=0,
        )['total_results']

        iconic_taxa_counts[taxon_name] = total_taxon_observations
        print(f'Total results for {taxon_name}: {total_taxon_observations}')
        if taxon_name != iconic_taxa[-1]:
            sleep(THROTTLING_DELAY)
    return iconic_taxa_counts

def get_iconic_icon(taxon_name):
    return TAXON_IMAGE_URL.format(taxon=taxon_name.lower())

In [None]:
total_observations_by_iconic_taxon = get_iconic_taxa_counts(inat.get_observations)

# Create a chart, sorted by number of observations, using the appropriate iNaturalist icons
observations_df = pd.DataFrame(
    [
        {'iconic taxon': k, 'observations': v, 'img': get_iconic_icon(k)}
        for k, v in total_observations_by_iconic_taxon.items()
    ]
)

# ## display with altair
# alt.Chart(
#     observations_df,
#     title=f'Verifiable observations in {PLACE_NAME} by iconic taxon',
#     width=750,
#     height=500,
# ).mark_image().encode(x=alt.X('iconic taxon:N', sort='-y'), y='observations:Q', url='img')

observations_df.sort_values('observations', ascending=False)

In [None]:
observations_by_year = inat.get_observation_histogram(
    place_id=PLACES.keys(),
    interval='year',
    d1='2008-01-01',
    d2=dt.date.today(),
    verifiable=True,
)
observations_by_year = pd.DataFrame(
    [{'date': k, 'observations': v} for k, v in observations_by_year.items()]
)
observations_by_year.set_index('date').plot();

In [None]:
observations_by_month = inat.get_observation_histogram(
    place_id=PLACES.keys(),
    interval='month',
    d1='2016-01-02',
    d2=dt.date.today(),
    verifiable=True,
)
observations_by_month = pd.DataFrame(
    [{'metric': 'Observations', 'date': k, 'count': v} for k, v in observations_by_month.items()]
)
observations_by_month.set_index('date').plot();

In [None]:
monthly_observations = observations_by_month.groupby(observations_by_month['date'].dt.month)['count'].sum().to_frame()
monthly_observations['metric'] = 'Observations'
monthly_observations['count'].plot()

In [None]:
## helper functions from pyinaturalist example
def count_date_range_results(function, start_date, end_date):
    """Get the count of results for the given date range and search function"""
    # Running this search with per_page=0 will (quickly) return only a count of results, not complete results
    response = function(
        place_id=PLACES.keys(),
        d1=start_date,
        d2=end_date,
        verifiable=True,
        per_page=0,
    )
    print(f'Total results for {start_date.strftime("%b")}: {response["total_results"]}')
    return response['total_results']


def get_monthly_counts(function, label):
    """Get the count of results per month for the given search function"""
    month_ranges = inat.get_interval_ranges(dt.datetime(2016, 1, 1), dt.datetime.today(), 'month')
    counts_by_month = {
        start_date: count_date_range_results(function, start_date, end_date)
        for (start_date, end_date) in month_ranges
    }
    return pd.DataFrame(
        [{'metric': label, 'date': k, 'count': v} for k, v in counts_by_month.items()]
    )

### this part didn't quite work...

In [None]:
taxa_by_month = get_monthly_counts(inat.get_observation_species_counts, 'Taxa')
observers_by_month = get_monthly_counts(inat.get_observation_observers, 'Observers')
identifiers_by_month = get_monthly_counts(inat.get_observation_identifiers, 'Identifiers')

In [None]:
# taxa_by_month
combined_results = pd.concat([taxa_by_month, observations_by_month, observers_by_month, identifiers_by_month]).pivot(index='date', columns='metric', values='count')
combined_results.plot()

### back on track after simplifying the scope...

In [None]:
observations = []     
taxa = []
observers = []
identifiers = []

for month in range(1,13):
    observations.append(inat.get_observations(place_id=PLACES.keys(), month=month, verifiable=True, per_page=0)['total_results'])
    taxa.append(inat.get_observation_species_counts(place_id=PLACES.keys(), month=month, verifiable=True, per_page=0)['total_results'])
    observers.append(inat.get_observation_observers(place_id=PLACES.keys(), month=month, verifiable=True, per_page=0)['total_results'])
    identifiers.append(inat.get_observation_identifiers(place_id=PLACES.keys(), month=month, verifiable=True, per_page=0)['total_results'])
    
counts = pd.DataFrame({'observations':observations, 'taxa':taxa, 'observers':observers, 'identifiers':identifiers})
counts.plot()

### maybe want to separately count observers vs species IDs? two ways of removing bias.

In [None]:
## Question: What if I wanted to contrast observation rate versus identification rate? I'd want to index that on the time of identification not the time of observation. 

# convenience: my recent observations, formatted for photo names

In [None]:
## option two: direct request and formatting

def get_mine(api_key: str, uname:str, lookback_in_days: int) -> pd.DataFrame:
    """
    Loads my observations by observation time (by iconic taxon?) in format appropriate for photo labels
    """
    # Define the base URL for the iNaturalist API
    base_url = "https://api.inaturalist.org/v1/observations"

    # Define the current date
    now = dt.datetime.now()
    start_date = now - dt.timedelta(days=lookback_in_days)

    response = inat.get_observations(user_id=[uname], d1=start_date, page='all')

    return pd.json_normalize(response['results']) 
    #return inat.Observation.from_json_list(response) 

In [None]:
df = get_mine(API_KEY, 'schizoform', 5)

for index, row in df.iterrows():
    print(f"""\n\n{row['observed_on']:%Y%m%d} {row['taxon.name']} ({row['species_guess']}) [inat obs id: {row['id']}]""")
    try:
        response = requests.get(row.photos[0]['url'].replace('square','small'))
        response.raise_for_status()  # Raise exception if the request failed
        img = mpimg.imread(BytesIO(response.content), format='jpg')
        plt.imshow(img)
        plt.xticks([])  # Hide x tick labels
        plt.yticks([])  # Hide y tick labels
        plt.show()
    except requests.exceptions.RequestException as e:
        print(f"Failed to load image: {e}")

# bloomers
The goal here is to see what plants will be blooming when in my local region. But along the way...

### Coming soon!

what plants might be blooming nest? seeding next? what mushrooms might I see? what (esp. migratory) birds? when are mammals active? 

In [None]:
PLACES = {1491:'Powhatan County',
          2920:'Goochland County',
          3032:'Louisa County'}

total_observations = inat.get_observations(
    place_id=PLACES.keys(),
    month=set( [(dt.date.today()+dt.timedelta(days=-7)).month, (dt.date.today()+dt.timedelta(days=7)).month] ),
    taxon_name='plants',
    ## clade:plant 47126
    #term_id=12, 
    #term_value_id=13, # flowering 
    #term_value_id=14, # fruiting
    ## make sure I'm getting tags so I can do normalization
    verifiable=True,
    per_page=0,
)['total_results']
print(f'Total plant observations: {total_observations}')

In [None]:
## flowering
results = inat.get_observation_species_counts(
    place_id=PLACES.keys(), ## might want to do larger/refined search e.g. VA or piedmont
    month=set( [(dt.date.today()+dt.timedelta(days=-7)).month, (dt.date.today()+dt.timedelta(days=7)).month] ),
    term_id=12, 
    term_value_id=13, # flowering 
    verifiable=True,
    #per_page=0,
)

# Normalize results to DataFrame
df_species_counts = pd.json_normalize(results['results'])

# Display species names and their main images
for index, row in df_species_counts.iterrows():
    taxon_name = row['taxon.name']
    common_name = row.get('taxon.preferred_common_name', 'N/A')
    image_url = row['taxon.default_photo.medium_url']

    print(f"\n{taxon_name} ({common_name})")

    try:
        response = requests.get(image_url)
        response.raise_for_status()  # Raise exception if the request failed
        img = mpimg.imread(BytesIO(response.content), format='jpg')
        plt.imshow(img)
        plt.xticks([])  # Hide x tick labels
        plt.yticks([])  # Hide y tick labels
        plt.show()
    except requests.exceptions.RequestException as e:
        print(f"Failed to load image: {e}")

### It'd be nice to specifically select "flowering" images!

In [None]:
## fruiting
results = inat.get_observation_species_counts(
    place_id=PLACES.keys(), ## might want to do larger/refined search e.g. VA or piedmont
    month=set( [(dt.date.today()+dt.timedelta(days=-7)).month, (dt.date.today()+dt.timedelta(days=7)).month] ), 
    term_id=12, 
    term_value_id=14, # fruiting
    verifiable=True,
)

# Normalize results to DataFrame
df_species_counts = pd.json_normalize(results['results'])

# Display species names and their main images
for index, row in df_species_counts.iterrows():
    taxon_name = row['taxon.name']
    common_name = row.get('taxon.preferred_common_name', 'N/A')
    image_url = row['taxon.default_photo.medium_url']

    print(f"\n{taxon_name} ({common_name})")

    try:
        response = requests.get(image_url)
        response.raise_for_status()  # Raise exception if the request failed
        img = mpimg.imread(BytesIO(response.content), format='jpg')
        plt.imshow(img)
        plt.xticks([])  # Hide x tick labels
        plt.yticks([])  # Hide y tick labels
        plt.show()
    except requests.exceptions.RequestException as e:
        print(f"Failed to load image: {e}")

### It'd be nice to specifically select "fruiting" images!

In [None]:
## mushrooms
results = inat.get_observation_species_counts(
    place_id=PLACES.keys(), ## might want to do larger/refined search e.g. VA or piedmont
    month=set( [(dt.date.today()+dt.timedelta(days=-7)).month, (dt.date.today()+dt.timedelta(days=7)).month] ), 
    taxon_id=47170, # mushrooms
    verifiable=True,
)

# Normalize results to DataFrame
df_species_counts = pd.json_normalize(results['results'])

# Display species names and their main images
for index, row in df_species_counts.iterrows():
    taxon_name = row['taxon.name']
    common_name = row.get('taxon.preferred_common_name', 'N/A')
    image_url = row['taxon.default_photo.medium_url']

    print(f"\n{taxon_name} ({common_name})")

    try:
        response = requests.get(image_url)
        response.raise_for_status()  # Raise exception if the request failed
        img = mpimg.imread(BytesIO(response.content), format='jpg')
        plt.imshow(img)
        plt.xticks([])  # Hide x tick labels
        plt.yticks([])  # Hide y tick labels
        plt.show()
    except requests.exceptions.RequestException as e:
        print(f"Failed to load image: {e}")

In [None]:
## animals
results = inat.get_observation_species_counts(
    place_id=PLACES.keys(), ## might want to do larger/refined search e.g. VA or piedmont
    month=set( [(dt.date.today()+dt.timedelta(days=-7)).month, (dt.date.today()+dt.timedelta(days=7)).month] ), 
    taxon_id=1, # animals
    verifiable=True,
)

# Normalize results to DataFrame
df_species_counts = pd.json_normalize(results['results'])

# Display species names and their main images
for index, row in df_species_counts.iterrows():
    taxon_name = row['taxon.name']
    common_name = row.get('taxon.preferred_common_name', 'N/A')
    image_url = row['taxon.default_photo.medium_url']

    print(f"\n{taxon_name} ({common_name})")

    try:
        response = requests.get(image_url)
        response.raise_for_status()  # Raise exception if the request failed
        img = mpimg.imread(BytesIO(response.content), format='jpg')
        plt.imshow(img)
        plt.xticks([])  # Hide x tick labels
        plt.yticks([])  # Hide y tick labels
        plt.show()
    except requests.exceptions.RequestException as e:
        print(f"Failed to load image: {e}")

# OLD

## general case using API: get species info in given location and time.

I think I've got this basically working, and it's potentially cool: especially if I worked on the display interface. But I'm still not convinced it's accurate! I'm especially surprised I don't have the most local observations (around ME) in this time period. XRef with similar attempt via URL?

This uses requests, which I think means it's NOT using pyinaturalist (the preferred API)

In [None]:
assert False, "try using the pyinaturalist interface instead"

def get_species_preview(api_key: str,
                        location: Tuple[float, float], 
                        distance: Union[int, float]=25, 
                        day_range: Tuple[int,int]=(-7,14),
                        max_years_lookback: int=20,
                       ) -> pd.DataFrame:
    """
    Fetches observations of flowering plants from the iNaturalist API within a specified distance 
    of a given location over the next two weeks for the past 10 years.

    Args:
        location (Tuple[float, float]): The latitude and longitude of the location of interest.
        distance (Union[int, float]): The radius around the location of interest, in kilometers.
        api_key (str): The iNaturalist API key.

    Returns:
        pd.DataFrame: A DataFrame containing the observations of flowering plants.

    Note:
        The taxon_id parameter is set to 47126, which is the ID for the kingdom Plantae. This will 
        return observations of all plants. If you're interested in a specific group of plants, 
        you'll need to find the appropriate taxon ID.

        The term_id parameter is set to 12, which is the ID for "flowering". This will return 
        observations where the plant was reported to be flowering.
    """
    # Define the base URL for the iNaturalist API
    base_url = "https://api.inaturalist.org/v1/observations"

    # Define the current date
    current = dt.datetime.now()

    # Initialize an empty DataFrame to store the results
    df = []

    # Fetch observations for the next two weeks for the past 50 years
    for year in range(current.year - max_years_lookback, current.year):
        # Define the start and end dates for the next two weeks of the current year
        ref_date = dt.datetime(year, current.month, current.day)
        start_date = ref_date + dt.timedelta(days=day_range[0])
        end_date = ref_date + dt.timedelta(days=day_range[1])

        # Define the parameters for the API request
        params: Dict[str, Union[str, int, float]] = {
            "d1": start_date.strftime("%Y-%m-%d"),
            "d2": end_date.strftime("%Y-%m-%d"),
            "radius": distance,
            "lat": location[0],
            "lng": location[1],
            "order": "desc",
            "order_by": "observed_on",
            #"term_id": 13,  # Term ID for flowering
        }

        # Make the API request
        response = requests.get(base_url, params=params, headers={"Authorization": "Bearer " + api_key})

        # Convert the response to a pandas DataFrame and append it to the results DataFrame
        df.append(pd.json_normalize(response.json()["results"]))

    return pd.concat(df, ignore_index=True)

In [None]:
df = get_species_preview(api_key=apiKey, location=(37.670,-77.803), distance=25)
df.shape

In [None]:
pd.options.display.max_rows=250
# df['annotations'].value_counts()
df.groupby('taxon.iconic_taxon_name')['taxon.preferred_common_name'].value_counts()

In [None]:
# taxon.preferred_common_name

In [None]:
#df.columns.to_list()
df[['taxon.iconic_taxon_name',
    'taxon.name', 'taxon.preferred_common_name',
    #'location', 'public_positional_accuracy',
    'observed_on',
    'photos',
    #'uri',
    'user.id',
]]

In [None]:
df['uri'][0]

In [None]:
## after verifying this works -- which has a ways to go!
## todo: add means of display

In [None]:
# df.columns.to_list()
# df['observed_on'].value_counts()
# df['taxon.name'].value_counts()
df[ df['taxon.name']=='Asimina triloba' ]['id']
# taxon.wikipedia_url

In [None]:
import requests

# Define the base URL for the iNaturalist API
base_url = "https://api.inaturalist.org/v1/observation_fields"

# Define the parameters for the API request
params = {
    "q": "flowering",
}

# Make the API request
response = requests.get(base_url, params=params)

# Print the results
print(response.json())

## confidence_manimal

general idea is to look at the rate at which I ID vs withdraw ID, slash the rate at which I nevermind-withdrawn ID vs the community taxon. how full of shit am I?

In [None]:
## get a list of all the clades I provided an ID
## idea: give it a user name and an ID -- what percent were subsequently confirmed accurate? confirmed inaccurate?

In [None]:
## step one: get all a usr's observations (e.g. mine)


def get_species_preview(api_key: str,
                        user: Tuple[float, float], 
                       ) -> pd.DataFrame:
    """
    Fetches observations of flowering plants from the iNaturalist API within a specified distance 
    of a given location over the next two weeks for the past 10 years.

    Args:
        location (Tuple[float, float]): The latitude and longitude of the location of interest.
        distance (Union[int, float]): The radius around the location of interest, in kilometers.
        api_key (str): The iNaturalist API key.

    Returns:
        pd.DataFrame: A DataFrame containing the observations of flowering plants.

    Note:
        The taxon_id parameter is set to 47126, which is the ID for the kingdom Plantae. This will 
        return observations of all plants. If you're interested in a specific group of plants, 
        you'll need to find the appropriate taxon ID.

        The term_id parameter is set to 12, which is the ID for "flowering". This will return 
        observations where the plant was reported to be flowering.
    """
    # Define the base URL for the iNaturalist API
    base_url = "https://api.inaturalist.org/v1/observations"

    # Fetch observations
    
    # Define the parameters for the API request
    params: Dict[str, Union[str, int, float]] = {
        
        "order": "desc",
        "order_by": "observed_on",
        #"term_id": 13,  # Term ID for flowering
    }

    # Make the API request
    response = requests.get(base_url, params=params, headers={"Authorization": "Bearer " + api_key})

    # Convert the response to a pandas DataFrame and append it to the results DataFrame
    df.append(pd.json_normalize(response.json()["results"]))

