#### Define Imports

In [47]:
import requests
import pandas as pd 
import random

api_key = '5TjgNMFCh7h44T09HbQnbGhU8as11D0FDdjfJhgV'
api_base_url = 'https://developer.nps.gov/api/v1/'

from helper_functions import *
model, vectorizer = state_code_model.trained_model()

#### Define Functions

In [48]:
def get_parks(params):
    """
    Use to find a list of all park names, codes, states, addresses and descriptions from the NPS parks endpoint.
    Can also be used to find specific park information.
    
    api_key: Personal API key to use in request
    """
    parks = []
    response = requests.get(f"{api_base_url}parks", params=params)
    data = response.json()
    
    for park in data['data']:
        parks.append({
            'fullName': park['fullName'],
            'parkCode': park['parkCode'],
            'state': park['states'],
            'addresses': park['addresses'],
            'description': park['description']
        })
    
    return parks

In [49]:
def get_parks_in_state(params):
    """
    Retrieve a list of parks in a specified state.
    
    state_code: The code of the state (e.g., 'CA' for California)
    api_key: Personal API key to use in request
    """
    parks_in_state = []
    
    response = requests.get(f"{api_base_url}parks", params=params)
    data = response.json()
    
    for park in data['data']:
        if params['stateCode'] in park['states'].split(','):
            parks_in_state.append(park['fullName'])
    
    return parks_in_state

In [50]:
def get_activities_list(params):
    """
    Use to get a list of activities available at a specific park based on park code
    
    api_key: Personal API key to use in request
    """

    # Define the endpoint for the activities query
    activities_endpoint = f"{api_base_url}activities/parks"
    
    # get activities information
    response = requests.get(activities_endpoint, params=params)
    activities_data = response.json()
    
    # Extract the activities
    activities = []
    for activity in activities_data['data']:
        if any(park['parkCode'] == params['parkCode'] for park in activity['parks']):
            activities.append(activity['name'])
    
    # Remove duplicates
    unique_activities = list(set(activities))
    
    return unique_activities

In [51]:
def get_alerts(params):
    """
    Retrieve a list of parks in a specified state.
    
    state_code: The code of the state (e.g., 'CA' for California)
    api_key: Personal API key to use in request
    """
    alerts = []
    
    response = requests.get(f"{api_base_url}alerts", params=params)
    data = response.json()
    for alert in data['data']:
        alerts.append(alert)
    
    return alerts

##### Synthetic Data Functions

In [52]:
def get_info(api_key,entities,entity_type,endpoint,intent,queries,entityCode,response_call=None):
    """
    Creates synthetic data in the necessary format for a specified API call.

    api_key: Personal API key to use in request.
    entities: List of items to loop through such as State, Parks, Amentities.
    entity_type: the column name to use in association with the entities being created. For example: Parks and park_name or dist_states and state.
    endpoint: the NPS API endpoint to call such as /activities or /parks.
    intent: General label for queries in a particular group. 
            For example, the questions "Tell me about {park}" and "I want to know more about {park}" could both be categorized with the label "GetParkInfo".
    queries: A list of queries you would like to associate with a given set of API calls.
    entityCode: The entity code to be used in the API call. e.g. stateCode & parkCode
        *This will likely need to be updated to have more dynamic functionality.
    response_call: Pass through the function name that should be used to intiate the API call. The results will be recorded in the response column of the dataset in list format.
    """
    dataset = []
    for entity in entities:
        for query in queries:
            # Create API parameters 
            params = {'api_key': api_key,
                    entityCode: entity
                }
            
            if entityCode == "parkCode":
                # Park name needs to be converted to park code
                params = {'api_key': api_key,
                        entityCode: park_lookup[entity]
                }
            if entityCode == "stateCode":
                params = {'api_key': api_key,
                        entityCode: state_code_model.map_state_code(entity, model, vectorizer)
                }
                
            # Set response function to use
            if response_call == "get_activities_list":
                response = get_activities_list(params)
            elif response_call == "get_parks":
                response = get_parks(params)
            elif response_call == "get_parks_in_state":
                response = get_parks_in_state(params)
            elif response_call == "get_alerts":
                 response = get_alerts(params)
            else:
                response = ""

            dataset.append({
                "query": query.format(entity=entity),
                "intent": intent,
                "entities": {entity_type: entity},
                "api_call": {
                    "endpoint": endpoint,
                    entityCode: params[entityCode]
                },
                "response":response
            })

    synthetic_queries = pd.json_normalize(dataset)
    return synthetic_queries


In [56]:
def generate_synthetic_dataset(api_key):
    """
    api_key: Personal API key to use in request
    """
    # Create synthetic data for general park information.
    queries = ["Tell me about {entity}"]
    GetParkInfo = get_info(api_key,entities = parks, entity_type = "park_name", endpoint = "/parks", intent = "GetParkInfo", queries = queries, entityCode = "parkCode")#,response_call = 'get_parks'

    # 'activities/parks'
    queries = ["What activities can I do in {entity}"]
    GetParkActivities = get_info(api_key,entities = parks, entity_type = "park_name", endpoint = "/activities", intent = "GetParkActivities", queries = queries, entityCode = "parkCode")#, response_call = "get_activities_list"

    queries = ["Which parks are in {entity}?"]
    FindParksInState = get_info(api_key,entities = dist_states, entity_type = "state", endpoint = "/parks", intent = "FindParksInState", queries = queries, entityCode = "stateCode")#,response_call = "get_parks_in_state"

    queries = ["Which parks are in {entity}?"]
    GetAlertsInPark = get_info(api_key,entities = parks, entity_type = "park_name", endpoint = "/alerts", intent = "GetAlertsInPark", queries = queries, entityCode = "parkCode")#, response_call = "get_alerts"

    synthetic_queries = pd.concat([GetParkInfo, GetParkActivities, FindParksInState, GetAlertsInPark], axis=0, ignore_index=True)
    return synthetic_queries

#### Define Variables

In [54]:

categories = ['activities', 'activities/parks', 'alerts', 'amenities','amenities/parksvisitorcenters',
              'amenities/parksplaces', 'articles', 'campgrounds', 'events', 'feespasses', 
              'lessonplans', 'multimedia/audio', 'multimedia/galleries', 'newsreleases',
              'parkinglots', 'parks', 'places', 'people', 'thingstodo', 
              'topics', 'topics/parks', 'tours', 'visitorcenters', ]


# list of parks
parks_df = pd.DataFrame(get_parks({'api_key': api_key}))
parks = parks_df['fullName'].tolist()
park_codes = parks_df['parkCode'].tolist()
park_lookup = dict(zip(parks, park_codes))

# list of states
# cor_states = parks_df['state'].tolist()
# all_states = []
# for item in cor_states:
#     separated_states = item.split(',')
#     all_states.extend(separated_states)
# dist_states = list(set(all_states))

dist_states = ["Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", 
                "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", 
                "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", 
                "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", 
                "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", 
                "New Hampshire", "New Jersey", "New Mexico", "New York", 
                "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", 
                "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", 
                "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", 
                "West Virginia", "Wisconsin", "Wyoming"]

In [29]:
# TODO:
# Create queries with abbreviated park names (Tell me about Acadia)

In [57]:
synthetic_queries_df = generate_synthetic_dataset(api_key)
synthetic_queries_df

Unnamed: 0,query,intent,response,entities.park_name,api_call.endpoint,api_call.parkCode,entities.state,api_call.stateCode
0,Tell me about Abraham Lincoln Birthplace Natio...,GetParkInfo,,Abraham Lincoln Birthplace National Historical...,/parks,abli,,
1,Tell me about Acadia National Park,GetParkInfo,,Acadia National Park,/parks,acad,,
2,Tell me about Adams National Historical Park,GetParkInfo,,Adams National Historical Park,/parks,adam,,
3,Tell me about African American Civil War Memorial,GetParkInfo,,African American Civil War Memorial,/parks,afam,,
4,Tell me about African Burial Ground National M...,GetParkInfo,,African Burial Ground National Monument,/parks,afbg,,
...,...,...,...,...,...,...,...,...
195,Which parks are in Blackwell School National H...,GetAlertsInPark,,Blackwell School National Historic Site,/alerts,blsc,,
196,Which parks are in Blue Ridge Parkway?,GetAlertsInPark,,Blue Ridge Parkway,/alerts,blri,,
197,Which parks are in Bluestone National Scenic R...,GetAlertsInPark,,Bluestone National Scenic River,/alerts,blue,,
198,Which parks are in Booker T Washington Nationa...,GetAlertsInPark,,Booker T Washington National Monument,/alerts,bowa,,


In [58]:
synthetic_queries_df[synthetic_queries_df['intent'] == "FindParksInState"] 

Unnamed: 0,query,intent,response,entities.park_name,api_call.endpoint,api_call.parkCode,entities.state,api_call.stateCode
100,Which parks are in Alabama?,FindParksInState,,,/parks,,Alabama,AL
101,Which parks are in Alaska?,FindParksInState,,,/parks,,Alaska,AK
102,Which parks are in Arizona?,FindParksInState,,,/parks,,Arizona,AZ
103,Which parks are in Arkansas?,FindParksInState,,,/parks,,Arkansas,AR
104,Which parks are in California?,FindParksInState,,,/parks,,California,CA
105,Which parks are in Colorado?,FindParksInState,,,/parks,,Colorado,CO
106,Which parks are in Connecticut?,FindParksInState,,,/parks,,Connecticut,CT
107,Which parks are in Delaware?,FindParksInState,,,/parks,,Delaware,DE
108,Which parks are in Florida?,FindParksInState,,,/parks,,Florida,FL
109,Which parks are in Georgia?,FindParksInState,,,/parks,,Georgia,GA


#### Development

In [31]:
park_abbreviations = {
    "Acadia": "Acadia National Park",
    "Arches": "Arches National Park",
    "Badlands": "Badlands National Park",
    "Big Bend": "Big Bend National Park",
    "Biscayne": "Biscayne National Park",
    "Black Canyon": "Black Canyon of the Gunnison National Park",
    "Bryce Canyon": "Bryce Canyon National Park",
    "Canyonlands": "Canyonlands National Park",
    "Capitol Reef": "Capitol Reef National Park",
    "Carlsbad Caverns": "Carlsbad Caverns National Park",
    "Carlsbad": "Carlsbad Caverns National Park",
    "Channel Islands": "Channel Islands National Park",
    "Congaree": "Congaree National Park",
    "Crater Lake": "Crater Lake National Park",
    "Cuyahoga Valley": "Cuyahoga Valley National Park",
    "Death Valley": "Death Valley National Park",
    "Denali": "Denali National Park & Preserve",
    "Dry Tortugas": "Dry Tortugas National Park",
    "Everglades": "Everglades National Park",
    "Gates of the Arctic": "Gates of the Arctic National Park & Preserve",
    "Glacier": "Glacier National Park",
    "Glacier Bay": "Glacier Bay National Park & Preserve",
    "Grand Canyon": "Grand Canyon National Park",
    "Grand Teton": "Grand Teton National Park",
    "Great Basin": "Great Basin National Park",
    "Great Sand Dunes": "Great Sand Dunes National Park & Preserve",
    "Great Smoky Mountains": "Great Smoky Mountains National Park",
    "Smoky Mountains": "Great Smoky Mountains National Park",
    "Guadalupe Mountains": "Guadalupe Mountains National Park",
    "Haleakalā": "Haleakalā National Park",
    "Hawaii Volcanoes": "Hawai'i Volcanoes National Park",
    "Hot Springs": "Hot Springs National Park",
    "Indiana Dunes": "Indiana Dunes National Park",
    "Isle Royale": "Isle Royale National Park",
    "Joshua Tree": "Joshua Tree National Park",
    "Katmai": "Katmai National Park & Preserve",
    "Kenai Fjords": "Kenai Fjords National Park",
    "Kings Canyon": "Kings Canyon National Park",
    "Kobuk Valley": "Kobuk Valley National Park",
    "Lake Clark": "Lake Clark National Park & Preserve",
    "Lassen Volcanic": "Lassen Volcanic National Park",
    "Mammoth Cave": "Mammoth Cave National Park",
    "Mammoth Caves": "Mammoth Cave National Park",
    "Mesa Verde": "Mesa Verde National Park",
    "Mount Rainier": "Mount Rainier National Park",
    "North Cascades": "North Cascades National Park",
    "Olympic": "Olympic National Park",
    "Petrified Forest": "Petrified Forest National Park",
    "Pinnacles": "Pinnacles National Park",
    "Redwood": "Redwood National and State Parks",
    "Rocky Mountain": "Rocky Mountain National Park",
    "Saguaro": "Saguaro National Park",
    "Sequoia": "Sequoia National Park",
    "Shenandoah": "Shenandoah National Park",
    "Theodore Roosevelt": "Theodore Roosevelt National Park",
    "Virgin Islands": "Virgin Islands National Park",
    "Voyageurs": "Voyageurs National Park",
    "Wind Cave": "Wind Cave National Park",
    "Wrangell-St. Elias": "Wrangell-St. Elias National Park & Preserve",
    "Yellowstone": "Yellowstone National Park",
    "Yosemite": "Yosemite National Park",
    "Zion": "Zion National Park"
}