In [19]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

In [20]:
# cache
# create cache
CACHE_FILENAME = "NP_data.json"

def open_cache():
    try:
        cache_file = open(CACHE_FILENAME, 'r')
        cache_contents = cache_file.read()
        cache_dict = json.loads(cache_contents)
        cache_file.close()
    except:
        cache_dict = {}
    return cache_dict

def save_cache(cache_dict):
    dumped_json_cache = json.dumps(cache_dict)
    fw = open(CACHE_FILENAME,"w")
    fw.write(dumped_json_cache)
    fw.close()

np_cache = open_cache()

In [166]:
# choose a state
# choose an option (activities, camping, etc.)
# get a list of qualified parks (UNITCODE -> park name)
# choose a park
# get the park's description / picture / etc.

# OR
# choose a UNESCO type
# get a list of parks and their state
# pick one, and display its attributes

In [21]:
api_key = '8fLoe0iHK1uwELcOTRO6FxqkoK2sxuwV2yueUAD0'

In [22]:
# read the csv file and get a list of all National Park UNIT_CODE
national_park = pd.read_csv('National_Parks.csv')

In [23]:
park_code = [code for code in national_park['UNIT_CODE']]

In [24]:
# create a tree structure (nested dictionary) to store data
by_state = {}
for state in national_park['STATE']:
    if state not in by_state.keys():
        by_state[state] = []

def categorize_by_state(park):
    for state in by_state.keys():
        if park['STATE'] == state:
            by_state[state].append(park['UNIT_CODE'])

attri_dict = {}
def add_dict(park):
    '''park is a dict'''
    for val in park.values():
        for code in val:
            attri_dict[code] = {
                'Full Name': '',
                'Description': '',
                'LatLong': '',
                'Address': '',
                'Contact': '',
                'Entrance Fee': '',
                'Operating Hours': '',
                'Activities': '',
                'Campgrounds': ''
            }
    return attri_dict

national_park.apply(categorize_by_state, axis=1)
park_data = add_dict(by_state)

In [25]:
# url_ex = 'https://developer.nps.gov/api/v1/alerts?parkCode=acad,dena&api_key=8fLoe0iHK1uwELcOTRO6FxqkoK2sxuwV2yueUAD0'
activities_url = 'https://developer.nps.gov/api/v1/activities?'
parks_url = 'https://developer.nps.gov/api/v1/parks?'
campgrounds_url = 'https://developer.nps.gov/api/v1/campgrounds?'

In [40]:
# function to add attributes
def get_park_attri(url, park_code, api_key, data):
    '''url, list of park codes, dict'''
    for code in park_code:
        source = f"{url}parkCode={code}&api_key={api_key}"
        response = requests.get(source)
        result = response.json()
        if code in np_cache.keys():
            return np_cache[code]
        else:
            np_cache[code] = {}
            try:
                np_cache[code]['Full Name'] = result['data'][0]['fullName']
            except:
                pass
            try:
                np_cache[code]['Description'] = result['data'][0]['description']
            except:
                pass
            try:
                np_cache[code]['LatLong'] = result['data'][0]['latLong']
            except:
                pass
            try:
                np_cache[code]['Address'] = result['data'][0]['addresses'][0]
            except:
                pass
            try:
                np_cache[code]['Contact'] = result['data'][0]['contacts']['phoneNumbers'][0]['phoneNumber']
            except:
                pass
            try:
                np_cache[code]['Entrance Fee'] = result['data'][0]['entranceFees']
            except:
                pass
            try:
                np_cache[code]['Operating Hours'] = result['data'][0]['operatingHours'][0]['standardHours']
            except:
                pass
            data[code] = np_cache[code]
            return data

def get_activity_attr(url, park_code, api_key, data):
    '''url, list of park codes, dict'''
    for code in park_code:
        source = f"{url}parkCode={code}&api_key={api_key}"
        response = requests.get(source)
        result = response.json()
        if code in np_cache.keys():
            return np_cache[code]
        else:
            np_cache[code] = {}
            try:
                activities = result['data']
                np_cache[code]['Activities'] = [act['name'] for act in activities]
            except:
                pass
            save_cache(np_cache)
            data[code] = np_cache[code]
            return data

def get_camp_attr(url, park_code, api_key, data):
    '''url, list of park codes, dict'''
    for code in park_code:
        source = f"{url}parkCode={code}&api_key={api_key}"
        response = requests.get(source)
        result = response.json()
        camp = result['data']
        if code in np_cache.keys():
            return np_cache[code]
        else:
            np_cache[code] = {}
            try:
                np_cache[code]['Campgrounds'] = [c['name'] for c in camp]
            except:
                pass
            data[code] = np_cache[code]
            return data

In [41]:
# Call the three functions to add attributes
get_camp_attr(campgrounds_url, park_code, api_key, park_data)
get_park_attri(parks_url, park_code, api_key, park_data)
get_activity_attr(activities_url, park_code, api_key, park_data)

{'Full Name': 'Amache National Historic Site',
 'Description': 'Amache, also known as the Granada Relocation Center, near Granada, Colorado was one of ten incarceration sites established by the War Relocation Authority during World War II to unjustly incarcerate Japanese Americans. Over 10,000 people, most American citizens, were incarcerated at Amache from 1942-1945. Amache - a place to reflect, recommit, and further the pursuit of freedom and justice.',
 'LatLong': 'lat:38.04962, long:-102.32903',
 'Address': {'postalCode': '81041',
  'city': 'Granada',
  'stateCode': 'CO',
  'line1': 'County Rd 23 5/10',
  'type': 'Physical',
  'line3': '',
  'line2': ''},
 'Contact': '',
 'Entrance Fee': [{'cost': '0.00',
   'description': 'There are no entrance fees.',
   'title': 'Entrance Fees'}],
 'Operating Hours': {'wednesday': 'Sunrise to Sunset',
  'monday': 'Sunrise to Sunset',
  'thursday': 'Sunrise to Sunset',
  'sunday': 'Sunrise to Sunset',
  'tuesday': 'Sunrise to Sunset',
  'friday':

In [42]:
park_data

{'AMCH': {'Full Name': 'Amache National Historic Site',
  'Description': 'Amache, also known as the Granada Relocation Center, near Granada, Colorado was one of ten incarceration sites established by the War Relocation Authority during World War II to unjustly incarcerate Japanese Americans. Over 10,000 people, most American citizens, were incarcerated at Amache from 1942-1945. Amache - a place to reflect, recommit, and further the pursuit of freedom and justice.',
  'LatLong': 'lat:38.04962, long:-102.32903',
  'Address': {'postalCode': '81041',
   'city': 'Granada',
   'stateCode': 'CO',
   'line1': 'County Rd 23 5/10',
   'type': 'Physical',
   'line3': '',
   'line2': ''},
  'Contact': '',
  'Entrance Fee': [{'cost': '0.00',
    'description': 'There are no entrance fees.',
    'title': 'Entrance Fees'}],
  'Operating Hours': {'wednesday': 'Sunrise to Sunset',
   'monday': 'Sunrise to Sunset',
   'thursday': 'Sunrise to Sunset',
   'sunday': 'Sunrise to Sunset',
   'tuesday': 'Sunr

In [29]:
# categorized by types
wiki_url = "https://en.wikipedia.org/wiki/List_of_national_parks_of_the_United_States"
response = requests.get(wiki_url)
soup = BeautifulSoup(response.content, "html.parser")

# Find the table containing the national park data
park_table = soup.find("table", {"class": "wikitable"})

# UNESCO designated Biosphere Reserves (BR)
br = []
# UNESCO designated World Heritage Sites (WHS)
whs = []
# parks designated in both UNESCO programs
both = []
def categorize_type(cells, color, type):
    if len(cells) > 0 and cells[0].get("bgcolor") == color:
        park_name = cells[0].find("a").text
        location = cells[2].find("a").text
        type.append((park_name, location))

# Loop through each row in the table and extract the park data
for row in park_table.find_all("tr"):
    # Find all the cells in the row
    cells = row.find_all("td")
    # Check if the first cell has bgcolor="#cedff2" (BR)
    categorize_type(cells, "#cedff2", br)
    # Check if the first cell has bgcolor="#cfecd2" (WHS)
    categorize_type(cells, "#cfecd2", whs)
    # Check if the first cell has bgcolor="#ddcef2" (both)
    categorize_type(cells, "#ddcef2", both)

print(br)
print(whs)
print(both)

br_full = br + both
whs_full = whs + both

[('Big Bend', 'Texas'), ('Channel Islands', 'California'), ('Congaree', 'South Carolina'), ('Death Valley', 'California'), ('Denali', 'Alaska'), ('Dry Tortugas', 'Florida'), ('Grand Teton', 'Wyoming'), ('Haleakalā', 'Hawaii'), ('Isle Royale', 'Michigan'), ('Joshua Tree', 'California'), ('Kings Canyon', 'California'), ('Rocky Mountain', 'Colorado'), ('Sequoia', 'California')]
[('Carlsbad Caverns', 'New Mexico'), ('Grand Canyon', 'Arizona'), ('Mesa Verde', 'Colorado'), ('Redwood', 'California'), ('Wrangell–St.\xa0Elias', 'Alaska'), ('Yosemite', 'California')]
[('Everglades', 'Florida'), ('Glacier', 'Montana'), ('Glacier Bay', 'Alaska'), ('Great Smoky Mountains', 'North Carolina'), ('Hawaiʻi Volcanoes', 'Hawaii'), ('Mammoth Cave', 'Kentucky'), ('Olympic', 'Washington'), ('Yellowstone', 'Wyoming')]
