In [77]:
import requests
requestBaseURL = "https://api.inaturalist.org/v1/observations/species_counts" 

requestParams = (
                    "verifiable=true"
                    "&spam=false"
                    "&captive=false"
                    "&locale=en"
                    "&preferred_place_id=1"
                    "&lat=42.947762"
                    "&lng=-78.782513"
                    "&radius=50"
#                     "&d1=2021-05-01"
#                     "&per_page=100"
                )

requestURL = f'{requestBaseURL}?{requestParams}'

def getResponsePage(pageNum):
    response = requests.get(f'{requestURL}&page={pageNum}')
    if response.status_code != 200:
        print('Error from API request')
        print(requestURL)
        print(response.text)
    return response.text

In [78]:
import json, math

'''
Get first N results from the API, fetching additional pages as needed. 
Calling with no targetCount returns all available results. 
'''
def getFirstNResults(targetCount='undefined'):
    results = []
    page = 1
    fetchMoreData = True
    while fetchMoreData:
        print(f'Fetching page {page}')
        data = getResponsePage(page)
        page+=1
        jsonData = json.loads(data)
        if len(results) == 0:
            # First run - figure out target pages
            per_page = jsonData['per_page']
            total_results = jsonData['total_results']
            targetResults = min(targetCount,total_results) if targetCount != 'undefined' else total_results
            total_pages = math.ceil(targetResults/per_page)
            print(f'Requested target result count: {targetCount}')
            print(f'Available result count: {total_results}')
            print(f'Actual target result count: {targetResults}')
            print(f'Results per page: {per_page}')
            print(f'Target page count: {total_pages}')
        results.extend(jsonData['results'])
        results = results[:targetResults]
        fetchMoreData = page <=total_pages
    print(f'Compiled {len(results)} results')
    return results

results = getFirstNResults(10000)

Fetching page 1
Requested target result count: 10000
Available result count: 436
Actual target result count: 436
Results per page: 500
Target page count: 1
Compiled 436 results


In [79]:
import pandas as pd

df = pd.json_normalize(results)
columns = list(df)
columns

['count',
 'taxon.observations_count',
 'taxon.taxon_schemes_count',
 'taxon.is_active',
 'taxon.ancestry',
 'taxon.flag_counts.resolved',
 'taxon.flag_counts.unresolved',
 'taxon.wikipedia_url',
 'taxon.current_synonymous_taxon_ids',
 'taxon.iconic_taxon_id',
 'taxon.rank_level',
 'taxon.taxon_changes_count',
 'taxon.atlas_id',
 'taxon.complete_species_count',
 'taxon.parent_id',
 'taxon.name',
 'taxon.rank',
 'taxon.extinct',
 'taxon.id',
 'taxon.default_photo.id',
 'taxon.default_photo.license_code',
 'taxon.default_photo.attribution',
 'taxon.default_photo.url',
 'taxon.default_photo.original_dimensions',
 'taxon.default_photo.flags',
 'taxon.default_photo.square_url',
 'taxon.default_photo.medium_url',
 'taxon.ancestor_ids',
 'taxon.iconic_taxon_name',
 'taxon.preferred_common_name',
 'taxon.establishment_means.establishment_means',
 'taxon.establishment_means.id',
 'taxon.establishment_means.place.id',
 'taxon.establishment_means.place.name',
 'taxon.establishment_means.place.dis

In [80]:
# Filter Columns
df = df[["count","taxon.name","taxon.preferred_common_name"]]

# Filter Rows
df = df[df["taxon.name"].notnull()]
# df = df[df["taxon.preferred_common_name"].notnull()]

df

Unnamed: 0,count,taxon.name,taxon.preferred_common_name
0,183,Phragmites australis,common reed
1,176,Taraxacum officinale,common dandelion
2,32,Turdus migratorius,American Robin
3,28,Rhus typhina,staghorn sumac
4,19,Glechoma hederacea,ground-ivy
...,...,...,...
431,1,Waldsteinia fragarioides,Appalachian barren-strawberry
432,1,Gerrini,
433,1,Parmelioideae,
434,1,Ptychostomum capillare,Capillary Thread-moss


In [81]:
df.to_csv(r'counts.csv',index=False)