In [2]:

# import all dependencies
import urllib.request, json
import pandas as pd
from dotenv import load_dotenv
import os
from icecream import ic

# Loading .env
load_dotenv()
API_KEY = os.getenv('NPS_API_KEY')


In [3]:
# Making a request to the NPS API
def make_request(limit, start):
    base_url = "https://developer.nps.gov/api/v1/parks"
    params = {"api_key": API_KEY, "limit": limit, "start": start}  # Adjust the limit based on your needs

    parks = []
    total = 0

    req = urllib.request.Request(base_url + "?" + urllib.parse.urlencode(params))
    response = urllib.request.urlopen(req).read()
    data = json.loads(response.decode('utf-8'))
    
    parks.extend(data["data"])
    return(parks) #pd.DataFrame(parks)  # Corrected return statement

In [4]:
# Grabbing the JSON to manipulate it before we add it to a dataFrame
raw_data = make_request(471, 0)

In [5]:
#Taking a look at all the columns
for x in raw_data[0]:
    print(x)

id
url
fullName
parkCode
description
latitude
longitude
latLong
activities
topics
states
contacts
entranceFees
entrancePasses
fees
directionsInfo
directionsUrl
operatingHours
addresses
images
weatherInfo
name
designation
multimedia
relevanceScore


In [6]:
#Cleaning entrance fees
def clean_fees():
    for park in raw_data:
        if not park['entranceFees']:
            park['entranceFees'] = False
        else:
            park['entranceFees'] = True

In [7]:
# Checking the data
clean_fees()
raw_data[0]['entranceFees']

False

In [8]:
# Cleaning up operating hours
def clean_hours(): 
    days = ['sunday','monday','tuesday','wednesday','thursday','friday','saturday']
    for parks in raw_data:
        try:
            hours = parks['operatingHours'][0]['standardHours']
            sorted_hours = {x: hours[x] for x in days}
            parks['operatingHours'] = sorted_hours
        except (KeyError, IndexError):
            pass

In [9]:
# Checking the data
clean_hours()
raw_data[200]['operatingHours']


{'sunday': 'All Day',
 'monday': 'All Day',
 'tuesday': 'All Day',
 'wednesday': 'All Day',
 'thursday': 'All Day',
 'friday': 'All Day',
 'saturday': 'All Day'}

In [10]:
def clean_latLong():
    for parks in raw_data:
        try:
            parks['latLong'] = parks['latitude'] + "," + parks['longitude']
            del(parks['latitude'], parks['longitude'])
        except KeyError:
            pass

In [11]:
# Testing out the latlong function
clean_latLong()
raw_data[0]['latLong']

'37.5858662,-85.67330523'

In [12]:
# Clean phone number
def clean_phone():
    for parks in raw_data:
        try:  
            parks['phoneNumber'] = parks['contacts']['phoneNumbers'][0]['phoneNumber']
        except:
            parks['phoneNumber'] = parks['contacts']['phoneNumbers']
        try:
            parks.pop('contacts')
        except:
            pass

In [13]:
# Lets test it
clean_phone()

In [14]:
# Clean up activities
def clean_activities():
    for parks in raw_data:
        activities = []
        try:
            for activity in parks['activities']:
                activities.append(activity['name'])
            parks['activities'] = activities
        except:
            pass

In [15]:
# Clean up topics
def clean_topics():
    for parks in raw_data:
        topics = []
        try:
            for topic in parks['topics']:
                topics.append(topic['name'])
            parks['topics'] = topics
        except:
            pass

In [16]:
# Clean up addresses
def clean_addresses():
    for parks in raw_data:
        parks['address'] = parks['addresses'][0]['line1'] + ", " + parks['addresses'][0]['city'] + ", " + parks['addresses'][0]['stateCode'] + " " + parks['addresses'][0]['postalCode']
        try:
            parks.pop('addresses')
        except:
            pass

In [17]:
clean_addresses()

In [18]:
clean_topics()
print(raw_data[0]['topics'])

['Birthplace', 'Presidents', 'Animals', 'Birds', 'Caves, Caverns and Karst', 'Geology', 'Groundwater', 'Freshwater Springs', 'Night Sky']


In [19]:
clean_activities()
print(raw_data[0]['activities'])

['Astronomy', 'Stargazing', 'Food', 'Picnicking', 'Guided Tours', 'Self-Guided Tours - Walking', 'Hands-On', 'Junior Ranger Program', 'Wildlife Watching', 'Birdwatching', 'Park Film', 'Museum Exhibits', 'Shopping', 'Bookstore and Park Store', 'Gift Shop and Souvenirs']


In [20]:
for x in raw_data[0].keys():
    print(x)

id
url
fullName
parkCode
description
latLong
activities
topics
states
entranceFees
entrancePasses
fees
directionsInfo
directionsUrl
operatingHours
images
weatherInfo
name
designation
multimedia
relevanceScore
phoneNumber
address


In [34]:
for x in raw_data[26].values():
    print(x)

E84B1E08-CCE8-4FFF-8DB6-B92F40953225
https://www.nps.gov/arho/index.htm
Arlington House, The Robert E. Lee Memorial
arho
Arlington House is the nation’s memorial to Robert E. Lee. It honors him for specific reasons, including his role in promoting peace and reunion after the Civil War. In a larger sense it exists as a place of study and contemplation of the meaning of some of the most difficult aspects of American history: military service; sacrifice; citizenship; duty; loyalty; slavery and freedom.
38.8822021484,-77.0734786987
['Guided Tours', 'Self-Guided Tours - Walking', 'Museum Exhibits']
['African American Heritage', 'American Revolution', 'Burial, Cemetery and Gravesite', 'National Cemetery', 'Enslavement', 'Military', 'Monuments and Memorials', 'Reconstruction', 'Social Movements', 'Civil Rights', 'Wars and Conflicts', 'War of 1812', 'Civil War', "Women's History"]
VA
False
[]
[]
Public Transportation: The Memorial is accessible by the Blue Line of the Metro subway system. The 

In [22]:
import pandas as pd

df = pd.DataFrame(raw_data)
df['states'].unique()

array(['KY', 'ME', 'MA', 'DC', 'NY', 'NE', 'HI', 'AK', 'CA', 'TX', 'PA',
       'CO', 'MP', 'GA', 'TN', 'MD', 'WI',
       'CT,GA,MA,MD,ME,NC,NH,NJ,NY,PA,TN,VA,VT,WV', 'VA', 'UT', 'AR',
       'MD,VA', 'NM', 'SD', 'FL', 'MT', 'KY,TN', 'MT,WY', 'AL', 'RI,MA',
       'NC,VA', 'WV', 'MS', 'KS', 'VI', 'MO,AR,OK,TX,NM,AZ,CA',
       'CA,CO,ID,KS,MO,NE,NV,OR,UT,WY', 'LA', 'AZ', 'NC',
       'VA,MD,DE,DC,PA,NY', 'SC', 'OH', 'DC,MD,WV',
       'DC,DE,MD,NY,PA,VA,WV', 'GA,TN', 'OK', 'ID', 'DC,MD,VA', 'CT',
       'OR', 'KY,TN,VA', 'CA,NV', 'NJ,PA', 'WY', 'CO,UT', 'WA', 'IA',
       'NM,TX', 'TX,LA', 'NJ,NY', 'IL,MS', 'DE,PA', 'AR,OK', 'MT,ND',
       'OR,WA', 'MO', 'NY,NJ', 'IN', 'AZ,UT', 'MN', 'NV', 'NJ', 'NC,TN',
       'FL,MS', 'WV,VA,MD', 'WA,OR,ID,MT', 'MI', 'AZ,CA', 'ND', 'AZ,NV',
       'IA,ID,IL,IN,KS,KY,MO,MT,NE,ND,OH,OR,PA,SD,WA,WV', 'IL', 'PA,NJ',
       'NM,WA,TN', 'VT', 'ID,WA', 'SD,NE', 'IL,IA,NE,UT,WY', 'AL,MS,TN',
       'AS', 'MA,CT', 'ID,MT,OR,WA', 'MI,MN,ND,NY,OH,PA,VT,WI',
 

In [23]:
filename = "park_info.json"
with open(filename, 'w') as f:
    json.dump(raw_data, f)