In [139]:
import requests
import json
import os
import pandas as pd
import numpy as np
import math
import time
from pprint import pprint

<h3>Start by getting your RIDB developer key and storing it as an enviroment variable</h3>

In [19]:
# first lets connect to the campsites to find all facilities where a person can camp
ridbendpoint = "https://ridb.recreation.gov/api/v1/campsites"
ridbkey = os.environ.get('RIDBKEY')
os.environ.get('RIDBKEY')

# now lets set up the specific parameters for the request
ridbparams = {'limit':50,'offset':0,'apikey': ridbkey}

# and make then load the request
response = requests.get(ridbendpoint, params=ridbparams)
campsites = json.loads(response.content)

### Then lets explore the data that comes back

In [25]:
print(campsites.keys(),'\n')


# lets print out the meta data to see what we find
print("--- Metadata ---")
pprint(campsites['METADATA'])

# Great we see there are ~102K campsites
print("\nwe will need roughly {0:.0f} loops to pull all the data".format(102000/50))

# lets print out a first dict to see what we get for each campsite
pprint(campsites['RECDATA'][0])

dict_keys(['RECDATA', 'METADATA']) 

--- Metadata ---
{'RESULTS': {'CURRENT_COUNT': 50, 'TOTAL_COUNT': 101941},
 'SEARCH_PARAMETERS': {'LIMIT': 50, 'OFFSET': 0, 'QUERY': ''}}

we will need roughly 2040 loops to pull all the data
{'ATTRIBUTES': [{'AttributeName': 'BEACH ACCESS',
                 'AttributeValue': 'Beach Access'},
                {'AttributeName': 'CAMPFIRE RINGS',
                 'AttributeValue': 'Campfire Rings'},
                {'AttributeName': 'Picnic Table',
                 'AttributeValue': 'Picnic Table'},
                {'AttributeName': 'Checkout Time',
                 'AttributeValue': '02:00 PM'},
                {'AttributeName': 'Condition Rating',
                 'AttributeValue': 'Standard'},
                {'AttributeName': 'Min Num of People', 'AttributeValue': '1'},
                {'AttributeName': 'Proximity to Water',
                 'AttributeValue': 'Lakefront'},
                {'AttributeName': 'Site Access', 'AttributeValue': 'Boat In'

In [30]:
pprint(campsites['METADATA'])

{'RESULTS': {'CURRENT_COUNT': 50, 'TOTAL_COUNT': 101941},
 'SEARCH_PARAMETERS': {'LIMIT': 50, 'OFFSET': 0, 'QUERY': ''}}


In [63]:
# thats a lot of loops - lets quickly estimate how long it would take to pull this data


# we created a timer and memory profile wrapper function awhile ago. Lets use the python decorator to execute this
from timerfunc import profile

@profile
def test_campsite_api():
    
    output_lst = []
    
    #pdb.set_trace()
    
    for i in range(50):

        # we can use the offset requests to loop through the api
        ridbparams = {'limit':50,'offset': i*50,'apikey': ridbkey}

        # and make then load the request
        response = requests.get(ridbendpoint, params=ridbparams)
        campsites = json.loads(response.content)

        # lets see how many sites we got back
        num_sites = int(campsites['METADATA']['RESULTS']['CURRENT_COUNT'])

        # we will need to load a few things to some lists but lets just do a few quick ones
        for l in range(num_sites):
            output_lst.append([campsites['RECDATA'][l]['CampsiteID'], 
                               campsites['RECDATA'][l]['ATTRIBUTES'], 
                               campsites['RECDATA'][l]['FacilityID']
                              ]
                             )


    return output_lst

In [64]:
test = test_campsite_api()

Time   58.24
Memory 20.86328125


so it takes rougly 1.2 seconds per API call. Given we need to do 2K of these that isnt a great answer. 

Lets play more on API website to see if they just have a large json or CSV with all of this data. 

Guess what they do, lets just pull in this data from the CSVs.

also lets not totally ignore the previous work we have done, we can see that the RIDB campsite API has signifincaly more information than the high level overview in the csv

In [102]:
from glob import glob
glob("../Data/RIDB_attributes/*")

['../Data/RIDB_attributes/EntityActivities_API_v1.csv',
 '../Data/RIDB_attributes/PermitEntranceZones_API_v1.csv',
 '../Data/RIDB_attributes/Media_API_v1.csv',
 '../Data/RIDB_attributes/FacilityAddresses_API_v1.csv',
 '../Data/RIDB_attributes/Facilities_API_v1.csv',
 '../Data/RIDB_attributes/Activities_API_v1.csv',
 '../Data/RIDB_attributes/Organizations_API_v1.csv',
 '../Data/RIDB_attributes/PermitEntrances_API_v1.csv',
 '../Data/RIDB_attributes/OrgEntities_API_v1.csv',
 '../Data/RIDB_attributes/Tours_API_v1.csv',
 '../Data/RIDB_attributes/CampsiteAttributes_API_v1.csv',
 '../Data/RIDB_attributes/Events_API_v1.csv',
 '../Data/RIDB_attributes/RecAreaFacilities_API_v1.csv',
 '../Data/RIDB_attributes/PermitEntranceAttributes_API_v1.csv',
 '../Data/RIDB_attributes/Links_API_v1.csv',
 '../Data/RIDB_attributes/TourAttributes_API_v1.csv',
 '../Data/RIDB_attributes/Campsites_API_v1.csv',
 '../Data/RIDB_attributes/RecAreaAddresses_API_v1.csv',
 '../Data/RIDB_attributes/MemberTours_API_v1.csv',

In [111]:
df_rec_address = pd.read_csv( '../Data/RIDB_attributes/RecAreaAddresses_API_v1.csv')
df_rec_address.dtypes

RecAreaAddressID          int64
RecAreaID                 int64
RecAreaAddressType       object
RecAreaStreetAddress1    object
RecAreaStreetAddress2    object
RecAreaStreetAddress3    object
City                     object
PostalCode               object
AddressStateCode         object
AddressCountryCode       object
LastUpdatedDate          object
dtype: object

['haleakalā', 'hawaiʻi volcanoes']

In [237]:
# now we match the two lists of names together
working = df_rec[df_rec['RecAreaName'].str.lower().str.contains('national park').fillna(False)]
mtch = combine_lsts(working.RecAreaName.values, nps_names_lst)
working['nps_name'] = working['RecAreaName'].map(mtch)

print(working.shape[0])

# awesome we only have 67 parks to go through. Lets save them to a csv and manually clean them up.
working.to_csv("temp.csv")



67


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [235]:
# after some manual cleaning we will identify the ids of the rec areas we want to study
np_rec_areas = pd.read_csv("../Data/OtherSource/manually_identified_recids_nationalparks.csv", usecols=[0,1,2,3], nrows=61)

In [226]:
# Great so now we have all our rec areas we will now have to look into the reservation data we have