# Title

## Import Libraries

In [8]:
# Import requests for JSON
import requests

# Import libraries required for this analysis
import numpy as np
import pandas as pd

# For displaying plots 
import seaborn as sns
import matplotlib.pyplot as plt

# For timing training/prediction
import time

# Display all fields
pd.set_option('display.max_columns', None)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

## Import Raw Data from NPS API (external script)





In [None]:
# (Moved to fetch_nps_data.py)
"""
# Available fields for each park

id
url
*fullName
parkCode
name
*description
*designation
*latitude
*longitude
latLong (combined "lat:..., long:...")
*activities (array of objects with id and name)
topics
*states
contacts (e.g., phone, email)
entranceFees (array)
entrancePasses
fees
directionsInfo
directionsUrl
operatingHours
addresses
images (photos array)
weatherInfo


API_KEY = "fpyJ9NycrgZX5mK8f0n90c4qXGPcYAsBPwt4BLJk"
url = "https://developer.nps.gov/api/v1/parks"

def fetch_all_parks(api_key):
    all_parks = []
    start = 0
    limit = 50

    while True:
        params = {
            "limit": limit,
            "start": start,
            "api_key": api_key
        }

        response = requests.get(url, params=params)
        response.raise_for_status()  # raise error for bad response
        data = response.json().get("data", [])

        if not data:
            break  # no more data

        all_parks.extend(data)
        start += limit  # go to next page

    return all_parks

# Fetch data
parks_raw = fetch_all_parks(API_KEY)

# Convert to DataFrame
records = []
for park in parks_raw:
    activity_list = park.get('activities', [])
    activity_names = [a.get('name', '') for a in activity_list]
    
    records.append({
        'name': park.get('fullName', ''),
        'latitude': park.get('latitude', ''),
        'longitude': park.get('longitude', ''),
        'designation': park.get('designation', ''),
        'states': park.get('states', ''),
        'description': park.get('description', ''),
        'activities': ', '.join(activity_names)
    })

parks = pd.DataFrame(records)

# Save as CSV
parks.to_csv("../data/nps_parks_with_activities.csv", index=False)
"""

## Import Generated CSV to Dataframe

In [None]:
parks = pd.read_csv('../data/nps_parks_with_activities.csv')

(474, 7)
                                                 name   latitude   longitude  \
0   Abraham Lincoln Birthplace National Historical...  37.585866  -85.673305   
1                                Acadia National Park  44.409286  -68.247501   
2                      Adams National Historical Park  42.255396  -71.011604   
3                 African American Civil War Memorial  38.916600  -77.026000   
4             African Burial Ground National Monument  40.714527  -74.004474   
5                 Agate Fossil Beds National Monument  42.421704 -103.753886   
6                 Ala Kahakai National Historic Trail  19.144668 -155.890734   
7                                  Alagnak Wild River  59.051802 -156.112002   
8                                 Alaska Public Lands  61.218800 -149.894536   
9                                     Alcatraz Island  37.826762 -122.423021   
10  Aleutian Islands World War II National Histori...  53.914300 -166.519700   
11          Alibates Flint Quar

## Inspect Data

In [36]:
# Print dataframe head
print('First five rows of dataframe')
display(parks.head())
print()
    
# Print dataframe sample
print('Random five rows of dataframe')
display(parks.sample(5))
print()
    
# Check for missing values
print('Check for Missing Values')
print(parks.isna().sum())
print()

# Check data types
print('Check Data Types')
print(parks.info())
print()

# Check values for each column
print('Describe Dataframe')
print(parks.describe(include = 'all'))
print()
    
# Check for duplicates
print('Count of Duplicated Rows')
print(parks.duplicated().sum())

First five rows of dataframe


Unnamed: 0,name,latitude,longitude,designation,states,description,activities
0,Abraham Lincoln Birthplace National Historical...,37.585866,-85.673305,National Historical Park,KY,For over a century people from around the worl...,"Astronomy, Stargazing, Food, Picnicking, Guide..."
1,Acadia National Park,44.409286,-68.247501,National Park,ME,Acadia National Park protects the natural beau...,"Arts and Culture, Cultural Demonstrations, Ast..."
2,Adams National Historical Park,42.255396,-71.011604,National Historical Park,MA,From the sweet little farm at the foot of Penn...,"Guided Tours, Self-Guided Tours - Walking, Liv..."
3,African American Civil War Memorial,38.9166,-77.026,,DC,"Over 200,000 African-American soldiers and sai...","Guided Tours, Self-Guided Tours - Walking"
4,African Burial Ground National Monument,40.714527,-74.004474,National Monument,NY,The African Burial Ground is the oldest and la...,"Arts and Culture, Guided Tours, Junior Ranger ..."



Random five rows of dataframe


Unnamed: 0,name,latitude,longitude,designation,states,description,activities
206,Guadalupe Mountains National Park,31.923045,-104.885527,National Park,TX,"Come experience mountains and canyons, desert ...","Auto and ATV, Auto Off-Roading, Camping, Backc..."
280,Maggie L Walker National Historic Site,37.547815,-77.43747,National Historic Site,VA,Maggie Lena Walker devoted her life to civil r...,"Guided Tours, Self-Guided Tours - Walking, Jun..."
91,Chesapeake & Ohio Canal National Historical Park,39.000284,-77.24814,National Historical Park,"DC,MD,WV",Preserving America's early transportation hist...,"Biking, Road Biking, Boating, Boat Tour, Campi..."
167,Fossil Butte National Monument,41.856352,-110.762475,National Monument,WY,In the ridges of southwest Wyoming are some of...,"Hiking, Junior Ranger Program, Wildlife Watchi..."
183,George Washington Memorial Parkway,38.9628,-77.1495,Memorial Parkway,"DC,MD,VA",The George Washington Memorial Parkway was des...,"Arts and Culture, Live Music, Biking, Road Bik..."



Check for Missing Values
name            0
latitude        1
longitude       1
designation    35
states          0
description     0
activities     10
dtype: int64

Check Data Types
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 474 entries, 0 to 473
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         474 non-null    object 
 1   latitude     473 non-null    float64
 2   longitude    473 non-null    float64
 3   designation  439 non-null    object 
 4   states       474 non-null    object 
 5   description  474 non-null    object 
 6   activities   464 non-null    object 
dtypes: float64(2), object(5)
memory usage: 26.1+ KB
None

Describe Dataframe
                                                     name    latitude  \
count                                                 474  473.000000   
unique                                                474         NaN   
top     Abraham Lincoln Birthplace Nat