#### Define Imports

In [143]:
import requests
import pandas as pd 
import random

api_key = '5TjgNMFCh7h44T09HbQnbGhU8as11D0FDdjfJhgV'
api_base_url = 'https://developer.nps.gov/api/v1/'

#### Define Functions

In [121]:
def get_parks(params):
    """
    Use to find a list of all park names, codes, states, addresses and descriptions from the NPS parks endpoint.
    Can also be used to find specific park information.
    
    api_key: Personal API key to use in request
    """
    parks = []
    response = requests.get(f"{api_base_url}parks", params=params)
    data = response.json()
    
    for park in data['data']:
        parks.append({
            'fullName': park['fullName'],
            'parkCode': park['parkCode'],
            'state': park['states'],
            'addresses': park['addresses'],
            'description': park['description']
        })
    
    return parks

In [151]:
def get_parks_in_state(params):
    """
    Retrieve a list of parks in a specified state.
    
    state_code: The code of the state (e.g., 'CA' for California)
    api_key: Personal API key to use in request
    """
    parks_in_state = []
    
    response = requests.get(f"{api_base_url}parks", params=params)
    data = response.json()
    
    for park in data['data']:
        if params['stateCode'] in park['states'].split(','):
            parks_in_state.append(park['fullName'])
    
    return parks_in_state

In [180]:
def get_activities_list(params):
    """
    Use to get a list of activities available at a specific park based on park code
    
    api_key: Personal API key to use in request
    """

    # Define the endpoint for the activities query
    activities_endpoint = f"{api_base_url}activities/parks"
    
    # get activities information
    response = requests.get(activities_endpoint, params=params)
    activities_data = response.json()
    
    # Extract the activities
    activities = []
    for activity in activities_data['data']:
        if any(park['parkCode'] == params['parkCode'] for park in activity['parks']):
            activities.append(activity['name'])
    
    # Remove duplicates
    unique_activities = list(set(activities))
    
    return unique_activities

##### Synthetic Data Functions

In [176]:
def get_info(api_key,entities,entity_type,endpoint,intent,queries,entityCode,response_call=None):
    """
    Creates synthetic data in the necessary format for a specified API call.

    api_key: Personal API key to use in request.
    entities: List of items to loop through such as State, Parks, Amentities.
    entity_type: the column name to use in association with the entities being created. For example: Parks and park_name or dist_states and state.
    endpoint: the NPS API endpoint to call such as /activities or /parks.
    intent: General label for queries in a particular group. 
            For example, the questions "Tell me about {park}" and "I want to know more about {park}" could both be categorized with the label "GetParkInfo".
    queries: A list of queries you would like to associate with a given set of API calls.
    entityCode: The entity code to be used in the API call. e.g. stateCode & parkCode
        *This will likely need to be updated to have more dynamic functionality.
    response_call: Pass through the function name that should be used to intiate the API call. The results will be recorded in the response column of the dataset in list format.
    """
    dataset = []
    for entity in entities:
        for query in queries:
            # Create API parameters 
            params = {'api_key': api_key,
                    entityCode: entity
                }
            
            if entityCode == "parkCode":
                # Park name needs to be converted to park code
                params = {'api_key': api_key,
                        entityCode: park_lookup[entity]
                }

            # Set response function to use
            if response_call == "get_activities_list":
                response = get_activities_list(params)
            elif response_call == "get_parks":
                response = get_parks(params)
            elif response_call == "get_parks_in_state":
                response = get_parks_in_state(params)
            else:
                response = ""

            dataset.append({
                "query": query.format(entity=entity),
                "intent": intent,
                "entities": {entity_type: entity},
                "api_call": {
                    "endpoint": endpoint,
                    entityCode: params[entityCode]
                },
                "response":response
            })

    synthetic_queries = pd.json_normalize(dataset)
    return synthetic_queries


In [184]:
def generate_synthetic_dataset(api_key):
    """

    api_key: Personal API key to use in request
    """
    # Create synthetic data for general park information.
    queries = ["Tell me about {entity}"]
    GetParkInfo = get_info(api_key,entities = parks, entity_type = "park_name", endpoint = "/parks", intent = "GetParkInfo", queries = queries, entityCode = "parkCode",response_call = 'get_parks')#,response_call = 'get_parks'

    queries = ["What activities can I do in {entity}"]
    GetParkActivities = get_info(api_key,entities = parks, entity_type = "park_name", endpoint = "/activities", intent = "GetParkActivities", queries = queries, entityCode = "parkCode", response_call = "get_activities_list")#, response_call = "get_activities_list"

    queries = ["Which parks are in {entity}?"]
    FindParksInState = get_info(api_key,entities = dist_states, entity_type = "state", endpoint = "/parks", intent = "FindParksInState", queries = queries, entityCode = "stateCode",response_call = "get_parks_in_state")#,response_call = "get_parks_in_state"

    synthetic_queries = pd.concat([GetParkInfo, GetParkActivities, FindParksInState], axis=0, ignore_index=True)
    return synthetic_queries

#### Define Variables

In [185]:

categories = ['activities', 'activities/parks', 'alerts', 'amenities','amenities/parksvisitorcenters',
              'amenities/parksplaces', 'articles', 'campgrounds', 'events', 'feespasses', 
              'lessonplans', 'multimedia/audio', 'multimedia/galleries', 'newsreleases',
              'parkinglots', 'parks', 'places', 'people', 'thingstodo', 
              'topics', 'topics/parks', 'tours', 'visitorcenters', ]


# list of parks
parks_df = pd.DataFrame(get_parks({'api_key': api_key}))
parks = parks_df['fullName'].tolist()
park_codes = parks_df['parkCode'].tolist()
park_lookup = dict(zip(parks, park_codes))

# list of states
cor_states = parks_df['state'].tolist()
all_states = []
for item in cor_states:
    separated_states = item.split(',')
    all_states.extend(separated_states)
dist_states = list(set(all_states))

In [186]:
synthetic_queries_df = generate_synthetic_dataset(api_key)
synthetic_queries_df

Unnamed: 0,query,intent,response,entities.park_name,api_call.endpoint,api_call.parkCode,entities.state,api_call.stateCode
0,Tell me about Abraham Lincoln Birthplace Natio...,GetParkInfo,[{'fullName': 'Abraham Lincoln Birthplace Nati...,Abraham Lincoln Birthplace National Historical...,/parks,abli,,
1,Tell me about Acadia National Park,GetParkInfo,"[{'fullName': 'Acadia National Park', 'parkCod...",Acadia National Park,/parks,acad,,
2,Tell me about Adams National Historical Park,GetParkInfo,[{'fullName': 'Adams National Historical Park'...,Adams National Historical Park,/parks,adam,,
3,Tell me about African American Civil War Memorial,GetParkInfo,[{'fullName': 'African American Civil War Memo...,African American Civil War Memorial,/parks,afam,,
4,Tell me about African Burial Ground National M...,GetParkInfo,[{'fullName': 'African Burial Ground National ...,African Burial Ground National Monument,/parks,afbg,,
...,...,...,...,...,...,...,...,...
128,Which parks are in FL?,FindParksInState,"[Big Cypress National Preserve, Biscayne Natio...",,/parks,,FL,FL
129,Which parks are in MD?,FindParksInState,"[Antietam National Battlefield, Appalachian Na...",,/parks,,MD,MD
130,Which parks are in GA?,FindParksInState,"[Andersonville National Historic Site, Appalac...",,/parks,,GA,GA
131,Which parks are in WI?,FindParksInState,"[Apostle Islands National Lakeshore, Ice Age N...",,/parks,,WI,WI


#### Development

In [85]:
def GetParkInfo(api_key):
    """
    Create synthetic data for general park information.
    Users can specify multiple queries by adding a question to the GetParkInfo list
    
    api_key: Personal API key to use in request
    """
    dataset = []
    for park in parks:
        GetParkInfo = [f"Tell me about {park}"]
        for query in GetParkInfo:
            dataset.append({
                "query": query,
                "intent": "GetParkInfo",
                "entities": {"park_name": park},
                "api_call": {
                    "endpoint": "/parks",
                    "params": {"parkCode": park_lookup[park]}
                },
                "response":""
            })
    synthetic_queries = pd.json_normalize(dataset)
    return synthetic_queries

In [91]:
def GetParkActivities(api_key):
    """

    
    api_key: Personal API key to use in request
    """
    dataset = []
    for park in parks:
        GetParkActivities = [f"What activities can I do in {park}?"]
        for query in GetParkActivities:
            dataset.append({
                "query": query,
                "intent": "GetParkActivities",
                "entities": {"park_name": park},
                "api_call": {
                    "endpoint": "/activities",
                    "params": {"parkCode": park_lookup[park]} 
                },
            "response":get_activities_list(park_lookup[park], api_key)
            })
    synthetic_queries = pd.json_normalize(dataset)
    return synthetic_queries

In [86]:
def FindParksInState(api_key):
    dataset = []
    for state in dist_states:
        FindParksInState = [f"Which parks are in {state}?"]
        for query in FindParksInState:
            dataset.append({
                "query": query,
                "intent": "FindParksInState",
                "entities": {"state": state},
                "api_call": {
                    "endpoint": "/parks",
                    "params": {"stateCode": state[:2].upper()}
                },
                "response":""
            })
    synthetic_queries = pd.json_normalize(dataset)
    return synthetic_queries