In [5]:
import requests
requestBaseURL = "https://api.inaturalist.org/v1/observations/species_counts" 

requestParams = (
                    "verifiable=true"
                    "&spam=false"
                    "&captive=false"
                    "&locale=en"
                    "&preferred_place_id=1"
                    "&lat=42.947762"
                    "&lng=-78.782513"
                    "&radius=50"
#                     "&d1=2021-05-01"
#                     "&per_page=100"
                )

requestURL = f'{requestBaseURL}?{requestParams}'

def getResponsePage(pageNum, extraParams=""):
    response = requests.get(f'{requestURL}&page={pageNum}{extraParams}')
    if response.status_code != 200:
        print('Error from API request')
        print(requestURL)
        print(response.text)
    return response.text

In [6]:
import json, math

'''
Get first N results from the API, fetching additional pages as needed. 
Calling with no targetCount returns all available results. 
'''
def getFirstNResults(targetCount, requestModifier="", silent=False):
    results = []
    page = 1
    fetchMoreData = True
    while fetchMoreData:
        if not silent:
            print(f'Fetching page {page}')
        data = getResponsePage(page,requestModifier)
        page+=1
        jsonData = json.loads(data)
        if len(results) == 0:
            # First run - figure out target pages
            per_page = jsonData['per_page']
            total_results = jsonData['total_results']
            targetResults = min(targetCount,total_results) if targetCount != 'undefined' else total_results
            total_pages = math.ceil(targetResults/per_page)
            if not silent:
                print(f'Requested target result count: {targetCount}')
                print(f'Available result count: {total_results}')
                print(f'Actual target result count: {targetResults}')
                print(f'Results per page: {per_page}')
                print(f'Target page count: {total_pages}')
        results.extend(jsonData['results'])
        results = results[:targetResults]
        fetchMoreData = page <=total_pages
    if not silent:
        print(f'Compiled {len(results)} results')
    return results

results = getFirstNResults(10000)

Fetching page 1
Requested target result count: 10000
Available result count: 5009
Actual target result count: 5009
Results per page: 500
Target page count: 11
Fetching page 2
Fetching page 3
Fetching page 4
Fetching page 5
Fetching page 6
Fetching page 7
Fetching page 8
Fetching page 9
Fetching page 10
Fetching page 11
Compiled 5009 results


In [7]:
import pandas as pd

df = pd.json_normalize(results)
columns = list(df)
columns

['count',
 'taxon.observations_count',
 'taxon.taxon_schemes_count',
 'taxon.is_active',
 'taxon.ancestry',
 'taxon.flag_counts.resolved',
 'taxon.flag_counts.unresolved',
 'taxon.wikipedia_url',
 'taxon.current_synonymous_taxon_ids',
 'taxon.iconic_taxon_id',
 'taxon.rank_level',
 'taxon.taxon_changes_count',
 'taxon.atlas_id',
 'taxon.complete_species_count',
 'taxon.parent_id',
 'taxon.name',
 'taxon.rank',
 'taxon.extinct',
 'taxon.id',
 'taxon.default_photo.id',
 'taxon.default_photo.license_code',
 'taxon.default_photo.attribution',
 'taxon.default_photo.url',
 'taxon.default_photo.original_dimensions',
 'taxon.default_photo.flags',
 'taxon.default_photo.square_url',
 'taxon.default_photo.medium_url',
 'taxon.ancestor_ids',
 'taxon.iconic_taxon_name',
 'taxon.preferred_common_name',
 'taxon.establishment_means.establishment_means',
 'taxon.establishment_means.id',
 'taxon.establishment_means.place.id',
 'taxon.establishment_means.place.name',
 'taxon.establishment_means.place.dis

In [8]:
# Filter Columns
df = df[["count","taxon.name","taxon.preferred_common_name"]]

# Filter Rows
df = df[df["taxon.name"].notnull()]
# df = df[df["taxon.preferred_common_name"].notnull()]

df

Unnamed: 0,count,taxon.name,taxon.preferred_common_name
0,711,Phragmites australis,common reed
1,477,Branta canadensis,Canada Goose
2,465,Turdus migratorius,American Robin
3,442,Sciurus carolinensis,Eastern Gray Squirrel
4,434,Danaus plexippus,Monarch
...,...,...,...
5004,1,Acleris pulverosana,
5005,1,Claopodium rostratum,Anomodon Moss
5006,1,Inocutis rheades,
5007,1,Hemicrepidius melanopthalmus,


In [9]:
def getNResultsForYear(number, year):
    return getFirstNResults(number,f'&year={year}',True)


In [10]:
lines = []
for y in range(20):
    year = 2021-y
    print(f'Testing {year}')
    results = getNResultsForYear(500,year)
    lines.append(f'{year}: {len(results)}')
print()
for line in lines:
    print(line)

Testing 2021
Testing 2020
Testing 2019
Testing 2018
Testing 2017
Testing 2016
Testing 2015
Testing 2014
Testing 2013
Testing 2012
Testing 2011
Testing 2010
Testing 2009
Testing 2008
Testing 2007
Testing 2006
Testing 2005
Testing 2004
Testing 2003
Testing 2002

2021: 500
2020: 500
2019: 500
2018: 500
2017: 500
2016: 500
2015: 453
2014: 350
2013: 197
2012: 159
2011: 84
2010: 54
2009: 65
2008: 82
2007: 67
2006: 120
2005: 10
2004: 53
2003: 5
2002: 13


In [11]:
import time

def runTest(startingYear):
    testResults = {}
    for year in range(startingYear,2022):
        print(year,end="\t")
        testResults[f'{year}']={}
        targetSizes = [100,200,300,400,500]
        batch = getNResultsForYear(max(targetSizes), year)
        for batchSize in targetSizes:
            batchResult = batch[:batchSize] if len(batch) >= batchSize else []
            print(len(batchResult),end="\t")
            testResults[f'{year}'][f'{batchSize}'] = batchResult
        print()
    print('Test Complete')
    return testResults

In [14]:
scopeTestResults = runTest(2012)

2012	100	0	0	0	0	
2013	100	0	0	0	0	
2014	100	200	300	0	0	
2015	100	200	300	400	0	
2016	100	200	300	400	500	
2017	100	200	300	400	500	
2018	100	200	300	400	500	
2019	100	200	300	400	500	
2020	100	200	300	400	500	
2021	100	200	300	400	500	
Test Complete


In [13]:
scopeTestResults['2021']['100'][0]

{'count': 599,
 'taxon': {'observations_count': 30093,
  'taxon_schemes_count': 7,
  'is_active': True,
  'ancestry': '48460/47126/211194/47125/47163/47162/47434/773413/773415/64276',
  'flag_counts': {'resolved': 1, 'unresolved': 0},
  'wikipedia_url': 'http://en.wikipedia.org/wiki/Phragmites',
  'current_synonymous_taxon_ids': None,
  'iconic_taxon_id': 47126,
  'rank_level': 10,
  'taxon_changes_count': 3,
  'atlas_id': None,
  'complete_species_count': None,
  'parent_id': 64276,
  'name': 'Phragmites australis',
  'rank': 'species',
  'extinct': False,
  'id': 64237,
  'default_photo': {'id': 113593857,
   'license_code': 'cc-by-sa',
   'attribution': '(c) Andreas Rockstein, some rights reserved (CC BY-SA)',
   'url': 'https://live.staticflickr.com/493/30836034403_ed25c020ec_s.jpg',
   'original_dimensions': None,
   'flags': [],
   'square_url': 'https://live.staticflickr.com/493/30836034403_ed25c020ec_s.jpg',
   'medium_url': 'https://live.staticflickr.com/493/30836034403_ed25c0