# iNaturalist API v1 Get Observations Example
- Link: https://jumear.github.io/stirpy/lab?path=iNatAPIv1_get_observations.ipynb
- GitHub Repo: https://github.com/jumear/stirpy

## Get Data from the iNaturalist API

This example gets either a single or mutliple pages of results from the [API](https://api.inaturalist.org/). The requests are made asynchronously (in parallel, with a small incremental delay between the initiation of each page request), allowing large recordsets to be fetched in the shortest amount of time while respectng iNaturalist's [suggested request limit](https://www.inaturalist.org/pages/developers) (about 1 per second). The example also provides a model for parsing the results, including flattening some of the items returned in the results. For example, a single observation can be associated with multiple identifications, and this example code can parse those multiple identifications into a single string so that all the identifications can be written out on a single line with the same observation row. (Flattening helps in certain use cases where the data is expected to be tabular, such as when exporting to a CSV file.) The example also provides a model for client-side filtering (as an alternative when server-side filtering is not possible). There are also other examples of how to get counts of observations or a series of counts (ex. observations by state) 

In [None]:
# load required modules
from urllib.parse import parse_qs # used for parsing URL parameters
import asyncio # used for asynchronous fetching
import math # used for a ceiling method
#from datetime import datetime # used to convert string datetimes into actual datetimes

# use Pyodide's pyfetch module if possible, but fall back to urllib3 outside of Pyodide
try:
    from pyodide.http import pyfetch # Pyodide's fetch function (asynchronous)
    use_pyfetch=True
except:
    #!pip install urllib3
    import urllib3 # fall back to urllib3 if pyfetch isn't available. it can be made asynchronous using asynchio.to_thread().
    use_pyfetch=False

In [None]:
# define the parameters needed for your request
req_params_string = 'verifiable=true&spam=false'
req_params = parse_qs(req_params_string)
req_headers_base = {'Content-Type': 'application/json', 'Accept': 'application/json'}

# to make authorized calls, set jwt to the "api_token" value from https://www.inaturalist.org/users/api_token.
# the JWT is valid for 24 hours. it can be used to do / access anything your iNat account can access. so keep it safe, and don't share it.
# you will also have to set use_authorization=True when making your API request below.
jwt = None

# define endpoints
endpoint_get_obs = {
    'method': 'GET',
    'url': 'https://api.inaturalist.org/v1/observations',
    'max_records': 10000,
    'max_per_page': 200,
}
endpoint_get_controlled_terms = {
    'method': 'GET' ,
    'url': 'https://api.inaturalist.org/v1/controlled_terms',
}

In [None]:
# basic function to fetch from API and convert repsonse to JSON
async def fetch_data(url, method='GET', use_authorization=False, delay=0):
    await asyncio.sleep(delay)
    req_headers = {}
    if use_authorization and jwt:
        req_headers = dict(req_headers_base) # make a copy
        req_headers['Authorization'] = jwt
    if use_pyfetch:
        response = await pyfetch(url, method=method, headers=req_headers)
        data = await response.json()
    else:
        response = await asyncio.to_thread(urllib3.request, method, url, headers=req_headers)
        data = response.json()
    print(f'fetch complete: {method} {url}')
    return data

# function to GET total_results (count) from the API
async def get_total_results(endpoint, params=None, use_authorization=False, delay=0):
    if params is None:
        params = {}
    rp = dict(params) # make a copy
    rp.pop('per_page', None) # remove per_page parameter, if it exists
    rp['per_page'] = ['0'] # set this to 0, since we need only the count, not the actual records
    data = await fetch_data(url_with_params(endpoint['url'], rp), use_authorization=use_authorization, delay=delay)
    total_results = int(data['total_results'])
    print(f'total records: {str(total_results)}')
    return total_results

# function to GET a single page of results from the API
# additional parsing and additional filtering before and after the parsing can happen here, too
# can be called directly but generally is intended to be called through get_results
# note that the parse_function is expected to be an async function (in case we need to get additional information from the API during parsing)
# but if the parse_function gets additional data from the API for every page, then the delay used by get_results may need to be tweaked to keep within request limits
# (ideally, cases where additional data wlll be needed from the API for every page can be handled after all pages have been retreieved)
async def get_results_single_page(endpoint, params=None, use_authorization=False, parse_function=None, pre_parse_filter_function=None, post_parse_filter_function=None, delay=0):
    if params is None:
        params = {}
    rp = dict(params) # make a copy
    data = await fetch_data(url_with_params(endpoint['url'], rp), use_authorization=use_authorization, delay=delay)
    results = data.get('results',[])
    if pre_parse_filter_function:
        results = filter(pre_parse_filter_function, results)
    if parse_function:
        results = await parse_function(results)
    if post_parse_filter_function:
        results = filter(post_parse_filter_function, results)
    return results

# function to GET results from the API
# if get_all_pages=True, then get all records, up to the limit that the API endpoint provides.
# query pages in parallel, with each page having a incrementally delayed start.
# (iNaturalist wants you to limit requests to ~1 req/second.)
async def get_results(endpoint, params=None, get_all_pages=False, use_authorization=False, parse_function=None, pre_parse_filter_function=None, post_parse_filter_function=None):
    if params is None:
        params = {}
    results = []
    max_page = math.ceil(endpoint['max_records'] / endpoint['max_per_page']) if get_all_pages else 1
    if get_all_pages:
        # when getting all pages, make a small query first to find how many total records there are.
        # this allows us to calculate how many requests we need to make in total.
        # if total records exceeds the maximum that the API will return, then retrieve only up to the maximum.
        total_results = await get_total_results(endpoint, params, use_authorization)
        total_pages = math.ceil(total_results / endpoint['max_per_page'])
        if total_pages < max_page:
            max_page = total_pages
        print(f'pages to retrieve: {str(max_page)}')
    async with asyncio.TaskGroup() as tg: # available in Python 3.11+
        tasks = []
        for i in range(max_page):
            rp = dict(params) # make a copy
            if get_all_pages:
                # if getting all pages, remove per_page and page parameters if they exist in the base params
                # and then set per_page = max and increment page for each request
                rp.pop('per_page', None)
                rp.pop('page', None)
                rp['per_page'] = [str(endpoint['max_per_page'])] # set this to the max if we're getting all pages
                rp['page'] = [str(i+1)]
            tasks.append(tg.create_task(get_results_single_page(endpoint, params=rp, use_authorization=use_authorization, parse_function=parse_function, pre_parse_filter_function=pre_parse_filter_function, post_parse_filter_function=post_parse_filter_function, delay=i)))
    for t in tasks:
        results += t.result()
    print(f'total records retrieved: {str(len(results))}')
    return results

# function used by another function get_field_value to get a particular value from a results row
def get_ref_value(rec, ref):
    # if the reference is chained (ex. taxon.id), then split these apart, and iterate through each object / dict.
    # if any of the references is an index for an array / list (ex. index 0 in photos[0].id), then handle those, too.
    ref_chain = ref.split('.')
    value = rec
    for r in ref_chain:
        items = [];
        if r.find('[') >= 0:
            r = r.replace(']','')
            r = r.split('[')
            items = r[1:len(r)]
            r = r[0]
        value = value.get(r)
        if value is None:
            break
        if items:
            for i in map(int, items):
                if len(value or []) == 0:
                    value = None
                    break
                value = value[i]
        if value is None:
            break
    return value

# function used by another function get_field_value to filter a nested list value based on certain filter parameters
def filter_ref_value(rec, value, params):
    filtered = []
    for r in value:
        for f in params:
            if not (get_ref_value(r,f.get('ref')) == (get_ref_value(rec, fvr) if (fvr := f.get('value_ref')) else f.get('value'))):
                break
        else:
            filtered.append(r)
    return filtered

# function used by another function parse_results to parse a results row and get / calculate the value for a particular field
def get_field_value(rec, field):
    # core processing
    value = get_ref_value(rec, field['ref']) if field['ref'] else rec 
    if value is None and field.get('alt'):
        value = get_ref_value(rec, field['alt'])
    if (ff := field.get('function')) == 'count':
        value = len(value or [])
    elif value is not None:
        fp = field.get('params',{})
        if ff == 'split':
            value = value.split(fp.get('separator'))[fp.get('index')]
            try:
                value = int(value)
            except:
                try:
                    value = float(value)
                except:
                    pass
        elif ff == 'join':
            value = value = fp.get('separator').join(map(str, value)) if value else None
        elif ff == 'replace':
            value = value.replace(fp.get('old_text'), fp.get('new_text'))
        elif ff == 'combine':
            cvalue = fp.get('template','')
            cref = fp.get('combine_refs',[])
            for i, cr in enumerate(cref):
                cvalue = cvalue.replace(f'{{{i}}}',str(get_ref_value(value,cr)))
            value = cvalue
        elif ff == 'filter_combine':
            filtered = filter_ref_value(rec, value, fp.get('filter',[]))
            fvalue = []
            for r in filtered:
                cvalue = fp.get('template','')
                cref = fp.get('combine_refs',[])
                for i, cr in enumerate(cref):
                    cvalue = cvalue.replace(f'{{{i}}}',str(get_ref_value(r,cr)))
                fvalue.append(cvalue)
            value = fp.get('separator').join(map(str, fvalue)) if fvalue else None
            if value == []:
                value = None
        elif ff == 'filter_count':
            filtered = filter_ref_value(rec, value, fp.get('filter',[]))
            value = len(filtered) if (dr := fp.get('distinct_ref')) is None else len(set([get_ref_value(r, dr) for r in filtered])) # get a distinct count if distinct_ref is defined
        elif ff == 'filter_select':
            filtered = filter_ref_value(rec, value, fp.get('filter',[]))
            fvalue = [get_ref_value(r, fp.get('select_ref')) for r in filtered]
            value = fp.get('separator').join(map(str, fvalue)) if fvalue else None
    return value

# function passed by get_obs to get_results to parse a set of observation results
async def parse_obs(results):
    # each dict in the field definition must have at least a ref (reference) key. (note: if ref is set to None, the observation row will be retrieved as the value.)
    # use an optional label if you want the key to be different from the ref.
    # use an optional alt (alternative reference) if you want a fallback in case no data is found in ref.
    # use optional function + params to do more complicated parsing of the ref.
    # really complicated logic can be handled with a little additional processing using parse_options. 
    parse_fields = [
        {'ref': 'id'},
        #{'label': 'url', 'ref': None, 'function': 'combine', 'params': {'combine_refs': ['id'], 'template': 'https://www.inaturalist.org/observations/{0}'}},
        #{'ref': 'uuid'},
        {'ref': 'quality_grade'},
        #{'label': 'user_id', 'ref': 'user.id'},
        {'label': 'user_login', 'ref': 'user.login'},
        {'label': 'user_login_id', 'ref': 'user', 'function': 'combine', 'params': {'combine_refs': ['login','id'], 'template': '{0} ({1})'}},
        #{'label': 'user_name', 'ref': 'user.name'},
        #{'label': 'taxon_ancestors', 'ref': 'taxon.ancestors', 'function': 'filter_combine', 'params': {'combine_refs': ['name','rank','id'], 'template': '{0} ({1}) ({2})', 'separator': ', '}},
        #{'label': 'kingdom', 'ref': 'taxon.ancestors', 'function': 'filter_select', 'params': {'filter': [{'ref': 'rank', 'value': 'kingdom'}], 'select_ref': 'name', 'separator': ', '}},
        #{'label': 'phylum', 'ref': 'taxon.ancestors', 'function': 'filter_select', 'params': {'filter': [{'ref': 'rank', 'value': 'phylum'}], 'select_ref': 'name', 'separator': ', '}},
        #{'label': 'class', 'ref': 'taxon.ancestors', 'function': 'filter_select', 'params': {'filter': [{'ref': 'rank', 'value': 'class'}], 'select_ref': 'name', 'separator': ', '}},
        #{'label': 'order', 'ref': 'taxon.ancestors', 'function': 'filter_select', 'params': {'filter': [{'ref': 'rank', 'value': 'order'}], 'select_ref': 'name', 'separator': ', '}},
        #{'label': 'family', 'ref': 'taxon.ancestors', 'function': 'filter_select', 'params': {'filter': [{'ref': 'rank', 'value': 'family'}], 'select_ref': 'name', 'separator': ', '}},
        #{'label': 'genus', 'ref': 'taxon.ancestors', 'function': 'filter_select', 'params': {'filter': [{'ref': 'rank', 'value': 'genus'}], 'select_ref': 'name', 'separator': ', '}},
        #{'label': 'species', 'ref': 'taxon.ancestors', 'function': 'filter_select', 'params': {'filter': [{'ref': 'rank', 'value': 'species'}], 'select_ref': 'name', 'separator': ', '}},
        {'label': 'taxon_id', 'ref': 'taxon.id'},
        {'label': 'taxon_name', 'ref': 'taxon.name'},
        {'label': 'taxon_preferred_common_name', 'ref': 'taxon.preferred_common_name'},
        {'label': 'taxon_rank', 'ref': 'taxon.rank'},
        #{'label': 'taxon_rank_level', 'ref': 'taxon.rank_level'},
        #{'label': 'taxon_ancestry', 'ref': 'taxon.ancestry'},
        #{'ref': 'observed_on_string'},
        {'ref': 'time_observed_at'},
        {'ref': 'created_at'},
        #{'ref': 'updated_at'},
        {'ref': 'place_guess'},
        #{'ref': 'location'},
        {'label': 'latitude', 'ref': 'location', 'function': 'split', 'params': {'separator': ',', 'index': 0}},
        {'label': 'longitude', 'ref': 'location', 'function': 'split', 'params': {'separator': ',', 'index': 1}},
        {'ref': 'public_positional_accuracy'},
        #{'ref': 'private_place_guess'},
        #{'ref': 'private_location'},
        #{'label': 'private_latitude', 'ref': 'private_location', 'function': 'split', 'params': {'separator': ',', 'index': 0}},
        #{'label': 'private_longitiude', 'ref': 'private_location', 'function': 'split', 'params': {'separator': ',', 'index': 1}},
        #{'ref': 'positional_accuracy'},
        {'ref': 'taxon_geoprivacy'},
        {'ref': 'privacy'},
        {'ref': 'description'},
        {'label': 'photos_count', 'ref':'photos', 'function': 'count'},
        #{'label': 'photo_1_id', 'ref': 'photos[0].id'},
        {'label': 'photo_1_url', 'ref': 'photos[0].url', 'function': 'replace', 'params': {'old_text': 'square', 'new_text': 'medium'}}, # size options are thumb, square, small, medium, large, and original
        {'label': 'photo_1_license_code', 'ref': 'photos[0].license_code'},
        {'label': 'sounds_count', 'ref':'sounds', 'function': 'count'},
        {'ref': 'comments_count'},
        #{'label': 'others_current_identifications_count', 'ref': 'identifications_count'},
        {'label': 'current_identifications_count', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}]}},
        #{'label': 'current_identifications_by_observer', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'user.id', 'value_ref': 'user.id'}]}},
        #{'label': 'current_identification_by_observer', 'ref': 'identifications', 'function': 'filter_select', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'user.id', 'value_ref': 'user.id'}], 'select_ref': 'taxon.name', 'separator': ', '}},
        #{'label': 'current_identification_category_by_observer', 'ref': 'identifications', 'function': 'filter_select', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'user.id', 'value_ref': 'user.id'}], 'select_ref': 'category', 'separator': ', '}},
        #{'ref': 'owners_identification_from_vision'},
        {'label': 'prefers_community_taxon', 'ref': 'preferences.prefers_community_taxon', 'alt': 'user.preferences.prefers_community_taxa'},
        #{'label': 'identifier_ids', 'ref': 'identifications', 'function': 'filter_select', 'params': {'filter': [{'ref': 'current', 'value': True}], 'select_ref': 'user.id', 'separator': ', '}},
        #{'label': 'identifier_logins', 'ref': 'identifications', 'function': 'filter_select', 'params': {'filter': [{'ref': 'current', 'value': True}], 'select_ref': 'user.login', 'separator': ', '}},
        {'label': 'identifications', 'ref': 'identifications', 'function': 'filter_combine', 'params': {'filter': [{'ref': 'current', 'value': True}], 'combine_refs': ['user.login','taxon.name','taxon.id'], 'template': '{0}: {1} ({2})', 'separator': ', '}},
        #{'label': 'identification_date_first', 'ref': 'identifications[0].created_at'},
        #{'label': 'identification_date_last', 'ref': 'identifications[-1].created_at'},
        #{'label': 'identifications_vs_obs', 'ref': 'identifications', 'function': 'filter_select', 'params': {'filter': [{'ref': 'current', 'value': True}], 'select_ref': 'vs_obs', 'separator': ', '}},
        {'label': 'identifications_vs_obs_same', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'same'}]}},
        #{'label': 'ident_taxa_vs_obs_same', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'same'}], 'distinct_ref': 'taxon.id'}},
        #{'label': 'identifications_vs_obs_ancestor', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'ancestor'}]}},
        {'label': 'ident_taxa_vs_obs_ancestor', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'ancestor'}], 'distinct_ref': 'taxon.id'}},
        #{'label': 'identifications_vs_obs_descendant', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'descendant'}]}},
        {'label': 'ident_taxa_vs_obs_descendant', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'descendant'}], 'distinct_ref': 'taxon.id'}},
        #{'label': 'identifications_vs_obs_different', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'different'}]}},
        {'label': 'ident_taxa_vs_obs_different', 'ref': 'identifications', 'function': 'filter_count', 'params': {'filter': [{'ref': 'current', 'value': True}, {'ref': 'vs_obs', 'value': 'different'}], 'distinct_ref': 'taxon.id'}},
        {'label': 'reviewed_by_count', 'ref': 'reviewed_by', 'function': 'count'},
        #{'ref': 'reviewed_by', 'function': 'join', 'params': {'separator':', '}},
        #{'ref': 'captive'},
        {'label': 'annotations_count','ref':'annotations', 'function': 'count'},
        #{'label': 'annotations_ids', 'ref': 'annotations', 'function': 'filter_combine', 'params': {'combine_refs': ['controlled_attribute_id','controlled_value_id'], 'template': '{0}:{1}', 'separator': ', '}},
        {'label': 'annotations', 'ref': 'annotations', 'function': 'filter_combine', 'params': {'combine_refs': ['controlled_attribute','controlled_value'], 'template': '{0}: {1}', 'separator': ', '}}, # note: this relies on some pre-procesing to create the controlled_attribute and controlled_value fields
        {'label': 'observation_fields_count', 'ref':'ofvs', 'function': 'count'},
        #{'label': 'observation_fields', 'ref': 'ofvs', 'function': 'filter_combine', 'params': {'combine_refs': ['name','field_id','value'], 'template': '{0} ({1}): {2}', 'separator': '; '}},
        {'label': 'observation_fields', 'ref': 'ofvs', 'function': 'filter_combine', 'params': {'combine_refs': ['name','field_id','taxon_or_value'], 'template': '{0} ({1}): {2}', 'separator': '; '}}, # note: this relies on pre-procesing to create a field that contains either taxon or value
        {'label': 'tags_count', 'ref':'tags', 'function': 'count'},
        {'ref': 'tags', 'function': 'join', 'params': {'separator': ', '}},
        #{'ref': 'oauth_application_id'},
        #{'ref': 'site_id'},
        {'label': 'gbif_occurence_url', 'ref': 'outlinks', 'function': 'filter_select', 'params': {'filter': [{'ref': 'source', 'value': 'GBIF'}], 'select_ref': 'url', 'separator': ', '}},
    ]
    # define some parse options for more complicated stiuations
    parse_field_refs = [pf.get('ref') for pf in parse_fields]
    parse_options = [];
    if 'annotations' in parse_field_refs:
        await get_annotations()
        parse_options.append('annotation_descriptions')
    if 'ofvs' in parse_field_refs:
        parse_options.append('observation_field_taxon_or_value')
    if 'identifications' in parse_field_refs:
        parse_options.append('identifications')
    if 'taxon.ancestors' in parse_field_refs:
        parse_options.append('taxon_ancestors')
    # parse based on the parse_fields defintion
    presults = []
    for r in results:
        # handle some special pre-processing parse_options at the row level
        if 'annotation_descriptions' in parse_options:
            for a in r.get('annotations',[]):
                a['controlled_attribute'] = get_annotations.xref[a['controlled_attribute_id']]
                a['controlled_value'] = get_annotations.xref[a['controlled_value_id']]
        if 'observation_field_taxon_or_value' in parse_options:
            for of in r.get('ofvs',[]):
                of['taxon_or_value'] = f'{of["taxon"]["name"]} ({of["taxon"]["id"]})' if of['datatype'] == 'taxon' and of.get('taxon') else of['value']
        if 'identifications' in parse_options:
            #ic = 0
            for i, id in enumerate(r.get('identifications',[])):
                #id['seq'] = i+1
                #if id['current'] == 'true':
                #    ic += 1
                #    id['seq_current'] = ic
                if not (rt := r.get('taxon')) or not (idt := id.get('taxon')):
                    id['vs_obs'] = 'none'
                elif rt['id'] == idt['id']:
                    id['vs_obs'] = 'same'
                elif (idta := idt.get('ancestry')) is not None and rt['id'] in map(int, idta.split('/')):
                    id['vs_obs'] = 'descendant'
                elif (rta := rt.get('ancestry')) is not None and idt['id'] in map(int, rta.split('/')):
                    id['vs_obs'] = 'ancestor'
                else:
                    id['vs_obs'] = 'different'
        if 'taxon_ancestors' in parse_options:
            ancestors = []
            rank_level_kingdom = 70 # this is the highest-level taxon stored in identification[i].ancestors
            if (rt := r.get('taxon')) and (taxon_id := rt.get('id')) is not None and (rank_level := rt.get('rank_level')) < rank_level_kingdom:
                for id in r.get('identifications',[]):
                    if (idt := id.get('taxon')):
                        if idt['id'] == taxon_id:
                            ancestors = list(idt['ancestors'])
                            break
                        if (idta := idt['ancestors']):
                            for i, atid in enumerate([a['id'] for a in idta]):
                                if atid == taxon_id:
                                    ancestors = idta[0:i] # add everything above this taxon (will add this taxon later below)
                                    break
                        if ancestors:
                            break
            if rt and rank_level <= rank_level_kingdom:
                ancestors.append(dict(rt))
                rt['ancestors'] = ancestors
        # core processing
        row = {}
        for i, f in enumerate(parse_fields):
            row[f.get('label') or f.get('ref') or f'col_{i+1}'] = get_field_value(r,f)
        presults.append(row)
    return presults

# function to get and parse observations
async def get_obs(params=None, get_all_pages=False, use_authorization=False, parse_function=parse_obs, pre_parse_filter_function=None, post_parse_filter_function=None):
    if params is None:
        params = {}
    if params.get('only_id',['false']) == ['true']: # if only_id=true, then don't parse fields because only id will exist in the results
        parse_function = None
    results = await get_results(endpoint_get_obs, params, get_all_pages, use_authorization, parse_function, pre_parse_filter_function, post_parse_filter_function)
    return results
 
# function to get a series of counts
# base_params are the (fixed) parameters that will be applied when getting the count for each item in the series.
# series_params is a list of (variable) parameters (keys) to add to base_params for each item in the series.
# series is a list of dicts, each of which defines the parameter key/value pairs for each item in the series.
# each item in the series list can contain additional attributes that are not parameters, and it does not have to contain all the keys in the series_params list.
# if add_count_to_series is set to True, the function will add the counts to the original series object; otherwise, it just returns a (deep) copy of series with counts.
async def get_count_series(endpoint, series, series_params, base_params=None, count_label='rec_count', use_authorization=False, add_count_to_series=False):
    if base_params is None:
        base_params = {}
    if not series or not series_params:
        print(f'The series parameter must be a list of dicts with keys that include the values in the list passed in for series_params.')
        return None
    rv = []
    results = series if add_count_to_series else list(series) # results will look the same, but if add_count_to_series=True, the original series list wlll actually change
    async with asyncio.TaskGroup() as tg: # available in Python 3.11+
        tasks = []
        for i, r in enumerate(results):
            rp = dict(base_params)
            for sp in series_params:
                if (spv := r.get(sp)) is not None:
                    rp.pop(sp, None)
                    rp[sp] = [str(spv)]
            tasks.append(tg.create_task(get_total_results(endpoint, rp, use_authorization=use_authorization, delay=i)))
    for i, t in enumerate(tasks):
        results[i][count_label] = t.result()
    return results

# function to combine the base url with a set of parameters
# there's a urlencode method in urllib.parse, but it's easier to get exactly what I need using this custom code.
def url_with_params(url_base, params=None):
    if params is None:
        params = {}
    url = url_base
    for p, v in params.items():
        s = '?' if url.find('?') < 0 else '&'
        pv = ','.join(v)
        url += f'{s}{p}={pv}'
    return url

# function to get annotation ids and descriptions from the API
# only the ids are included in the GET /v1/observations response. so a cross-reference is needed to translate the ids to plain English.
# reults are stored in an attribute on the function called xref so that it won't be necessary to get data from the APi more than once
async def get_annotations():
    xref = getattr(get_annotations, 'xref', None)
    if xref is None:
        xref = {};
        terms = await(fetch_data(endpoint_get_controlled_terms['url']))
        for t in terms['results']:
            xref[t['id']] = t['label']
            for v in t['values']:
               xref[v['id']] = v['label']
        print(f'retrieved annnotation cross-references ({len(xref)} items)')
        get_annotations.xref = xref
    return xref

# function to string together a list of observation ids into sets of up to a max number of observations per set
# the original intended use case is to create URLs linking to the iNaturalist Explore or Identification page, filtered for specific observations
def obs_ids_to_sets(obs_ids, max_set_size=500, separator=',', prefix=''):
    obs_id_sets = []
    for i in range(0, len(obs_ids), max_set_size):
        obs_id_string = prefix + separator.join(map(str, obs_ids[i:i+max_set_size]))
        obs_id_sets.append(obs_id_string)
        print(f'Set {int(i/max_set_size+1)}: {obs_id_string}')
    return obs_id_sets

In [None]:
# main execution section

# get observations
obs = await get_obs(req_params, get_all_pages=False, use_authorization=False)
#obs

# when possible, it's always best to filter on the server side by using filter parameters when making API requests.
# but when a particular filter is not available in the API, it may still be possible to filter on the client side (as opposed to server side)
# here's an example of how to do client-side filtering for observations which have >1 (current) identification using post_parse_filter_function
# use pre_parse_filter_function when you can filter based on the results directly from the API response.
# use post_parse_filter_function when you must rely on the values in a parsed field to do the filtering.
# (you can always filter separately *after* getting observations, of course, but filtering *while* getting obs saves on system memory when getting multiple pages of results from the API.)
#obs = await get_obs(req_params, get_all_pages=False, use_authorization=False, post_parse_filter_function=(lambda x: x['current_identifications_count'] > 1))

# get observation ids from obs
#obs_ids = [o.get('id') for o in obs]
#obs_id_sets = obs_ids_to_sets(obs_ids, prefix='https://www.inaturalist.org/observations/identify?id=')

# get just total results (count)
#obs_count = await get_total_results(endpoint_get_obs, req_params, use_authorization=False)
#obs_count

# get a series of counts
#obs_count_series = [
#    {'label': 'Texas 2020', 'year': 2020, 'place_id': 18},
#    {'label': 'not Texas 2020', 'year': 2020, 'not_in_place': 18},
#    {'label': 'Texas 2021', 'year': 2021, 'place_id': 18},
#    {'label': 'not Texas 2021', 'year': 2021, 'not_in_place': 18},
#]
#await get_count_series(endpoint_get_obs, obs_count_series, ['year','place_id','not_in_place'], base_params=req_params, count_label='obs_count', use_authorization=False, add_count_to_series=True)
#obs_count_series

In [None]:
# if you order by id when you get observations (this is the default behavior if you don't specify an order_by parameter), 
# then it should be possible to work around the max 10000 record limit of the API by using the id_above or id_below parameters.
# i purposely am not automating this process completely (because I don't want to make it too easy to accidentally get a ton of data),
# but i'm including this bit of code here to provide an idea of how to do it.
# to use the code below, set get_more_obs = True before running.
get_more_obs = False
#if get_more_obs and obs and len(obs) >= endpoint_get_obs['max_records'] and len(obs) % endpoint_get_obs['max_records'] == 0:
if get_more_obs and obs:
    rp = dict(req_params) # make a copy
    if rp.get('order_by',['id']) == ['id']: # this only works if the records were sorted by id
        if rp.get('order',['desc']) == ['asc']:
            max_id = max([o.get('id') for o in obs])
            print(f'getting additional observations for id_above={max_id}')
            rp.pop('id_above', None) # remove per_page parameter, if it exists
            rp['id_above'] = [str(max_id)] # set this to the max_id so that the records we get will have ids above those of the obs we already have
        else:
            min_id = min([o.get('id') for o in obs])
            print(f'getting additional observations for id_below={min_id}')
            rp.pop('id_below', None) # remove per_page parameter, if it exists
            rp['id_below'] = [str(min_id)] # set this to the min_id so that the records we get will have ids below those of the obs we already have
        obs += await get_obs(rp, get_all_pages=True, use_authorization=False)
        print(f'observations accumulated: {len(obs)}')

## Write Data to CSV

Ths takes the results retrieved above and writes them to a CSV file. The file will appear in the main folder of the file tree (the topmost tab in the left pane of the JupyterLab interface). Files generated in JupyterLite are saved to the browser's storage. So those will need to be downloaded to a more permanent location if they need to be archived more permanently.

In [None]:
# load required modules
import csv # used to output CSV files

In [None]:
# function write data to a CSV file
def data_to_csv(data, csv_filename='export.csv'):
    csv_fields = list(data[0]) # get fields from the keys of the first record in the dataset
    with open(csv_filename, 'w', newline='') as csv_file:
        csv_writer = csv.DictWriter(csv_file, fieldnames=csv_fields)
        csv_writer.writeheader()
        csv_writer.writerows(data)
        print(f'created CSV file {csv_filename} with {len(data)} records.')

In [None]:
# export to CSV
data_to_csv(obs,'observations.csv')

## Work with Data in a DataFrame

Since many Python analysis / visualization modules and workflows rely on getting data into a `pandas` dataframe, this provides a very barebones example of getting the data into a dataframe. The dataframe should generally handle most of the data type conversions, but there's a little bit more effort to get dates into a datetime typed column in the dataframe.

In [None]:
# load required modules
import pandas as pd

In [None]:
# load data into a DataFrame (df)
df = pd.DataFrame(obs)

In [None]:
# Get basic summary statistics for df
df.describe()

In [None]:
# Preview the contents of the df
df

In [None]:
# convert datetime columns to datetimes, localized to UTC
for k in ['time_observed_at','created_at','updated_at']:
    if k in df.columns:
        try:
            df[k] = pd.to_datetime(df[k], utc=True, errors='coerce')
        except:
            print(f'could not convert column {k} to datetime')

# get count (of id) by observed year
df.groupby(df.time_observed_at.dt.year).id.count()

# get count (of id) by created year
#df.groupby(df.created_at.dt.year).id.count()

In [None]:
# filtering example: records where observation_fields are not null
df.loc[df.observation_fields.notnull()]

# count (of id of) records where acc > 100
# df.loc[df.public_positional_accuracy > 100].id.count()