In [1]:
import pandas as pd
import numpy as np
from glob import glob
import json
import time

In [2]:
# first lets find all of our rec areas
df_rec_all = pd.read_csv('../Data/RIDB_attributes/RecAreas_API_v1.csv')


# then filter these for just national parks
count_np = sum(df_rec_all['RecAreaName'].str.contains('National Park').fillna(False))
print(f"We find {count_np} rec areas, even though we know there are 62")

# lets look some more at these data frames
df_rec_np = df_rec_all[df_rec_all['RecAreaName'].str.contains('National Park').fillna(False)]


# what is in this data frame
df_rec_np.head()


We find 66 rec areas, even though we know there are 62


Unnamed: 0,RecAreaID,OrgRecAreaID,ParentOrgID,RecAreaName,RecAreaDescription,RecAreaUseFeeDescription,RecAreaDirections,RecAreaPhone,RecAreaEmail,RecAreaReservationURL,RecAreaMapURL,RecAreaLongitude,RecAreaLatitude,StayLimit,Keywords,Reservable,Enabled,LastUpdatedDate
629,13525,GRTE,128,Grand Teton National Park,Rising above a scene rich with extraordinary w...,,Grand Teton National Park is located in northw...,(307) 739-3399,grte_info@nps.gov,,,-110.705467,43.818536,,"Arts and Culture,Auto and ATV,Biking,Boating,C...",False,True,2020-03-19
1078,13951,JEFF,128,Gateway Arch National Park,The Gateway Arch reflects St. Louis' role in t...,,For directions click on the provided link,(314) 655-1600,jeff_superintendent@nps.gov,,,-90.189251,38.625807,,Gateway Arch National Park,False,True,2020-03-19
1117,140042,,128,Channel Islands National Park,Channel Islands National Park,,,,,,,0.0,0.0,,,True,True,2020-03-19
2765,2554,ACAD,128,Acadia National Park,Acadia National Park protects the natural beau...,,"From Boston take I-95 north to Augusta, Maine,...",(207) 288-3338,acadia_information@nps.gov,,,-68.300633,44.307775,,"Arts and Culture,Biking,Boating,Camping,Climbi...",False,True,2020-03-19
2785,2573,ARCH,128,Arches National Park,Visit Arches to discover a landscape of contra...,,Arches National Park is located in southeast U...,(435) 719-2299,archinfo@nps.gov,,,-109.586367,38.722618,,"Arts and Culture,Auto and ATV,Biking,Camping,C...",False,True,2020-03-19


In [3]:
# now lets connect the rec areas to facilites
df_fac_rec = pd.read_csv( '../Data/RIDB_attributes/RecAreaFacilities_API_v1.csv')

print(df_fac_rec.head(), "\n")

print(df_fac_rec[['RecAreaID', "FacilityID"]].dtypes)

# Ok in the other data frame these are strings. Lets convert so we can merge the data together
df_fac_rec['RecAreaID'] = df_fac_rec['RecAreaID'].astype(str)
df_fac_rec['FacilityID'] = df_fac_rec['FacilityID'].astype(str)

# And now we merge
rec_facilities = pd.merge(left=df_rec_np, right=df_fac_rec, how='left', on=['RecAreaID'])

   RecAreaID  FacilityID
0      16669    10000305
1       2782    10001053
2       2782    10001055
3        287    10001419
4        286    10001432 

RecAreaID     int64
FacilityID    int64
dtype: object


In [4]:
# and now we will add campsites
df_camp = pd.read_csv("../Data/RIDB_attributes/Campsites_API_v1.csv")

print(df_camp[['FacilityID','CampsiteName', 'TypeOfUse']].dtypes)

# looks like we need to conver these to a string for merging
df_camp['FacilityID'] = df_camp['FacilityID'].astype(str)

FacilityID       int64
CampsiteName    object
TypeOfUse       object
dtype: object


In [5]:
# now we merge the data frames together to get the set we will do analysis on
np_supply = pd.merge(left=rec_facilities, right=df_camp, how='left', on='FacilityID')
np_supply.columns.values

array(['RecAreaID', 'OrgRecAreaID', 'ParentOrgID', 'RecAreaName',
       'RecAreaDescription', 'RecAreaUseFeeDescription',
       'RecAreaDirections', 'RecAreaPhone', 'RecAreaEmail',
       'RecAreaReservationURL', 'RecAreaMapURL', 'RecAreaLongitude',
       'RecAreaLatitude', 'StayLimit', 'Keywords', 'Reservable',
       'Enabled', 'LastUpdatedDate_x', 'FacilityID', 'CampsiteID',
       'CampsiteName', 'CampsiteType', 'TypeOfUse', 'Loop',
       'CampsiteAccessible', 'CampsiteLongitude', 'CampsiteLatitude',
       'CreatedDate', 'LastUpdatedDate_y'], dtype=object)

In [6]:
# ok lets count how many National Parks we have campsites for
np_count_sites = np_supply.groupby(by=['RecAreaName', "RecAreaID", "OrgRecAreaID"]).agg({'CampsiteID':'count'}).sort_values(by=['CampsiteID'], ascending=False)
np_count_sites.rename(columns={'CampsiteID':'count_campsites'}, inplace=True)
np_count_sites.to_csv("temp.csv")

In [7]:
# exploring data

In [8]:
import requests
import os
import json

In [9]:
scope = ["parkCode", 'id', "name", 'fullName', 'longitude','latitude', 'designation',"url"]

def parse_dict(tgt_dict, scope: list):
    
    """This is a quick group of formatting functions to take in a specific dictionary and give back a data frame"""
    
    count_parks = len(tgt_dict['data'])
    
    parks_loop = []
    
    for i in range(count_parks):

        # for each campsite we will add what we care about to a list
        specific_park = []
        
        for item in scope:
            specific_park.append(tgt_dict['data'][i][item])
        
        parks_loop.append(specific_park)

        
    return pd.DataFrame(parks_loop, columns=scope)

In [10]:
def request_loop(size=10):
    
    endpoint = "https://developer.nps.gov/api/v1/parks"
    key = os.getenv("NPSKEY")

    df_lst_parks = []
    
    for i in range(size):
        
        print("Getting {0} loop of {1}".format(i+1, size))
        
        parameters = {"limit":50, "start":(i * 50), "api_key":key}
        response = requests.get(endpoint,parameters)
        response_data = json.loads(response.content)

        df = parse_dict(response_data, scope)
        
        df_lst_parks.append(df)

    return pd.concat(df_lst_parks)

In [20]:
df_parks = request_loop(size=10)

Getting 1 loop of 10
Getting 2 loop of 10
Getting 3 loop of 10
Getting 4 loop of 10
Getting 5 loop of 10
Getting 6 loop of 10
Getting 7 loop of 10
Getting 8 loop of 10
Getting 9 loop of 10
Getting 10 loop of 10


In [21]:
df_parks = df_parks.reset_index().drop('index', axis=1)

In [22]:
def in_wiki_lst(txt):
    i = 0

    for park in wiki_names_lst:
        if park.lower() in txt.lower():
            i += 1
    return i > 0 

In [23]:
# lets see if we can use the wikipedia list of National Park Names to match them to recreaction area names
nps_names = pd.read_csv("../Data/OtherSource/wikipedia_nps_names.csv")
wiki_names_lst = [x.lstrip('\xa0').strip('\xa0') for x in nps_names['NationalParks_clean'].values]


In [25]:
df_parks['InWikiList'] = df_parks.fullName.apply(in_wiki_lst)

In [None]:
df_parks.to_csv("temp.csv")

In [45]:
df_parks_np = df_parks[df_parks.InWikiList]

In [48]:
np_park_codes = df_parks_np['parkCode'].tolist()

In [55]:
# lets see if we can find the campsites

def campground_counts(endpoint, key, park_codes):
    
    camp_count = []
    
    for park in park_codes:
    
        parameters = {"parkCode":park,"limit":50, "start":0, "api_key":key}

        response = requests.get(endpoint,parameters)
        
        ans = json.loads(response.content)
        
    return ans

In [90]:
dct_ans = campground_counts(endpoint = "https://developer.nps.gov/api/v1/campgrounds", 
                      key = os.getenv("NPSKEY"), park_codes= np_park_codes[60:62])

In [91]:
dct_ans['data'][0].keys()



#scope = ["parkCode", 'id', "name", 'fullName', 'longitude','latitude', 'designation',"url"]

dict_keys(['contacts', 'reservationUrl', 'regulationsurl', 'numberOfSitesReservable', 'campsites', 'longitude', 'numberOfSitesFirstComeFirstServe', 'directionsUrl', 'regulationsOverview', 'operatingHours', 'description', 'images', 'parkCode', 'amenities', 'id', 'weatheroverview', 'audioDescription', 'reservationInfo', 'accessibility', 'directionsoverview', 'name', 'latLong', 'addresses', 'latitude'])

In [96]:
int(dct_ans['data'][0]['campsites']['totalsites'])

3

In [97]:
dct_ans['total']

'36'