In [90]:
import requests
requestBaseURL = "https://api.inaturalist.org/v1/observations/species_counts" 

requestParams = (
                    "verifiable=true"
                    "&spam=false"
                    "&captive=false"
                    "&locale=en"
                    "&preferred_place_id=1"
                    "&lat=42.947762"
                    "&lng=-78.782513"
                    "&radius=50"
#                     "&d1=2021-05-01"
#                     "&per_page=100"
                )

requestURL = f'{requestBaseURL}?{requestParams}'

def getResponsePage(pageNum, extraParams=""):
    response = requests.get(f'{requestURL}&page={pageNum}{extraParams}')
    if response.status_code != 200:
        print('Error from API request')
        print(requestURL)
        print(response.text)
    return response.text

In [91]:
import json, math

'''
Get first N results from the API, fetching additional pages as needed. 
Calling with no targetCount returns all available results. 
'''
def getFirstNResults(targetCount, requestModifier="", silent=False):
    results = []
    page = 1
    fetchMoreData = True
    while fetchMoreData:
        if not silent:
            print(f'Fetching page {page}')
        data = getResponsePage(page,requestModifier)
        page+=1
        jsonData = json.loads(data)
        if len(results) == 0:
            # First run - figure out target pages
            per_page = jsonData['per_page']
            total_results = jsonData['total_results']
            targetResults = min(targetCount,total_results) if targetCount != 'undefined' else total_results
            total_pages = math.ceil(targetResults/per_page)
            if not silent:
                print(f'Requested target result count: {targetCount}')
                print(f'Available result count: {total_results}')
                print(f'Actual target result count: {targetResults}')
                print(f'Results per page: {per_page}')
                print(f'Target page count: {total_pages}')
        results.extend(jsonData['results'])
        results = results[:targetResults]
        fetchMoreData = page <=total_pages
    if not silent:
        print(f'Compiled {len(results)} results')
    return results

results = getFirstNResults(10000)

Fetching page 1
Requested target result count: 10000
Available result count: 5007
Actual target result count: 5007
Results per page: 500
Target page count: 11
Fetching page 2
Fetching page 3
Fetching page 4
Fetching page 5
Fetching page 6
Fetching page 7
Fetching page 8
Fetching page 9
Fetching page 10
Fetching page 11
Compiled 5007 results


In [92]:
import pandas as pd

df = pd.json_normalize(results)
columns = list(df)
# columns

In [93]:
# Filter Columns
df = df[["count","taxon.id","taxon.name","taxon.preferred_common_name"]]

# Filter Rows
df = df[df["taxon.name"].notnull()]
df = df[df["count"]>=100]
# df = df[df["taxon.preferred_common_name"].notnull()]

df

Unnamed: 0,count,taxon.id,taxon.name,taxon.preferred_common_name
0,716,64237,Phragmites australis,common reed
1,482,7089,Branta canadensis,Canada Goose
2,470,12727,Turdus migratorius,American Robin
3,442,46017,Sciurus carolinensis,Eastern Gray Squirrel
4,441,42223,Odocoileus virginianus,White-tailed Deer
...,...,...,...,...
108,104,126513,Larinioides cornutus,Furrow Orbweaver
109,103,13632,Baeolophus bicolor,Tufted Titmouse
110,102,10373,Molothrus ater,Brown-headed Cowbird
111,102,84481,Calopteryx maculata,Ebony Jewelwing


In [5]:
def getNResultsForYear(number, year):
    return getFirstNResults(number,f'&year={year}',True)


In [6]:
lines = []
for y in range(20):
    year = 2021-y
    print(f'Testing {year}')
    results = getNResultsForYear(500,year)
    lines.append(f'{year}: {len(results)}')
print()
for line in lines:
    print(line)

Testing 2021
Testing 2020
Testing 2019
Testing 2018
Testing 2017
Testing 2016
Testing 2015
Testing 2014
Testing 2013
Testing 2012
Testing 2011
Testing 2010
Testing 2009
Testing 2008
Testing 2007
Testing 2006
Testing 2005
Testing 2004
Testing 2003
Testing 2002

2021: 500
2020: 500
2019: 500
2018: 500
2017: 500
2016: 500
2015: 453
2014: 350
2013: 197
2012: 159
2011: 84
2010: 54
2009: 65
2008: 82
2007: 67
2006: 120
2005: 10
2004: 53
2003: 5
2002: 13


In [15]:
import time

targetSizes = [100,200,300,400,500]
startingYear = 2012
endYear = 2021

def runTest():
    testResults = {}
    for year in range(startingYear,endYear+1):
        print(year,end="\t")
        testResults[f'{year}']={}
        batch = getNResultsForYear(max(targetSizes), year)
        for batchSize in targetSizes:
            batchResult = batch[:batchSize] if len(batch) >= batchSize else []
            print(len(batchResult),end="\t")
            testResults[f'{year}'][f'{batchSize}'] = batchResult
        print()
    print('Test Complete')
    return testResults

In [16]:
scopeTestResults = runTest()

2012	100	0	0	0	0	
2013	100	0	0	0	0	
2014	100	200	300	0	0	
2015	100	200	300	400	0	
2016	100	200	300	400	500	
2017	100	200	300	400	500	
2018	100	200	300	400	500	
2019	100	200	300	400	500	
2020	100	200	300	400	500	
2021	100	200	300	400	500	
Test Complete


In [48]:
def getFilteredDataFromResultSet(results):
    resultsDF = pd.json_normalize(results)
    # Filter Columns
    resultsDF = resultsDF[["count","taxon.name","taxon.preferred_common_name"]]

    # Filter Rows
    resultsDF = resultsDF[resultsDF["taxon.name"].notnull()]

    return resultsDF

def runYearlyComparison(previousYearResults, currentYearResults):
    previousYearDF = getFilteredDataFromResultSet(previousYearResults)
    currentYearDF = getFilteredDataFromResultSet(currentYearResults)
#     print(previousYearDF)
#     print(currentYearDF)
    intersection = pd.merge(previousYearDF, currentYearDF, how='inner', on=['taxon.name'])
    return intersection
    

In [61]:
# scopeTestResults['2021']['100'][0]

comparisonResults = {}
# Assemble plots
for y in scopeTestResults.keys():
    print(f'\t{y}',end="")
print()
for batchSize in targetSizes:
    comparisonResults[f'{batchSize}'] = {}
    print(batchSize,end="\t")
    for year in scopeTestResults.keys():
        if int(year) > startingYear:
            previousYearResults = scopeTestResults[f'{int(year)-1}'][f'{batchSize}']
            currentYearResults = scopeTestResults[year][f'{batchSize}']
#             print(f'{year} {len(previousYearResults)} {len(currentYearResults)}')
            if len(previousYearResults) == len(currentYearResults) and len(previousYearResults) == batchSize:
                comparisonResults[f'{batchSize}'][f'{year}'] = len(runYearlyComparison(previousYearResults, currentYearResults).index)/batchSize
                print(round(comparisonResults[f'{batchSize}'][f'{year}'],2),end="\t")
            else:
                print('-',end="\t")
        else:
            print('-',end="\t")
    print()
                

	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021
100	-	0.24	0.29	0.44	0.54	0.59	0.47	0.67	0.68	0.52	
200	-	-	-	0.48	0.52	0.56	0.56	0.69	0.73	0.52	
300	-	-	-	0.42	0.48	0.54	0.62	0.68	0.77	0.54	
400	-	-	-	-	0.46	0.55	0.63	0.7	0.74	0.53	
500	-	-	-	-	-	0.54	0.63	0.71	0.73	0.56	


In [67]:
for b in targetSizes:
    print(f'\t{b}',end="")
print()
for y in scopeTestResults.keys():
    print(y,end="\t")
    for b in targetSizes:
        res = round(comparisonResults[f'{b}'][f'{y}'],2) if y in comparisonResults[f'{b}'].keys() else '-'
        print(res if res is not None else '-',end="\t")
    print()

	100	200	300	400	500
2012	-	-	-	-	-	
2013	0.24	-	-	-	-	
2014	0.29	-	-	-	-	
2015	0.44	0.48	0.42	-	-	
2016	0.54	0.52	0.48	0.46	-	
2017	0.59	0.56	0.54	0.55	0.54	
2018	0.47	0.56	0.62	0.63	0.63	
2019	0.67	0.69	0.68	0.7	0.71	
2020	0.68	0.73	0.77	0.74	0.73	
2021	0.52	0.52	0.54	0.53	0.56	
