#### Define Imports

In [1]:
import requests
import pandas as pd 
import random
import json
import sys

from helper_functions import *
park_model, park_vectorizer = park_code_model.trained_model()

sys.path.insert(0,'../')
from environment import env

#### Define Variables

In [2]:
config = env.env()
nps_api_key = config['nps_api_key']
api_base_url = 'https://developer.nps.gov/api/v1/'
park_sample = 25

#### Define Functions

In [3]:
def get_parks(params):
    """
    Use to find a list of all park names, codes, states, addresses and descriptions from the NPS parks endpoint.
    Can also be used to find specific park information.
    *Chat GPT was used to create the pagination process for the API

    params: The param dict to pass through the API call
    """
    parks = []
    limit = 50  # Number of results per page, maximum allowed by NPS API
    start = 0   # Initial starting point for pagination
    
    while True:
        params['limit'] = limit
        params['start'] =  start
        
        response = requests.get(f"{api_base_url}parks", params=params)
        data = response.json()
        
        parks.extend([
            {
                'fullName': park['fullName'],
                'parkCode': park['parkCode'],
                'state': park['states'],
                'addresses': park.get('addresses', []),
                'description': park['description']
            } for park in data['data']
        ])
        
        # Move to the next page
        start += limit
        
        # Break the loop if all parks have been retrieved
        if int(start) >= int(data['total']):
            break
    
    return parks

In [4]:
def create_synthetic_queries(api_key, entities, endpoint, intent, queries):
    """
    Creates synthetic data in the necessary format for a specified API call.

    api_key: Personal API key to use in request.
    entities: List of items to loop through such as State, Parks, Amentities.
    endpoint: the NPS API endpoint to call such as /activities or /parks.
    intent: General label for queries in a particular group. 
            For example, the questions "Tell me about {park}" and "I want to know more about {park}" could both be categorized with the label "GetParkInfo".
    queries: A list of queries you would like to associate with a given set of API calls.
    """
    dataset = []
    for entity in entities:
        for query in queries:
            # Create API parameters 
            params = {'api_key': nps_api_key,
                    "parkCode": park_code_model.map_park_code(query.format(entity=entity), park_model, park_vectorizer)
            }
                

            dataset.append({
                "query": query.format(entity=entity),
                "intent": intent,
                "api_call": {
                    "endpoint": endpoint,
                    "parkCode": params["parkCode"]
                }
            })

    synthetic_queries = pd.json_normalize(dataset)
    return synthetic_queries


#### Create Synthetic Data

In [5]:
# list of parks
parks_df = pd.DataFrame(get_parks({'api_key': nps_api_key}))
parks = parks_df['fullName'].tolist()
park_codes = parks_df['parkCode'].tolist()
park_roots = nps_parks_root.nps_parks_root()

park_lookup = dict(zip(parks, park_codes))
root_dict = dict(zip(parks, park_roots))

In [6]:
# Parks combined is the combination of two lists: The full park names and estimated park name abbreviations that users might use (i.e. Acadia National Park vs Acadia)
parks_sampled = random.sample(parks, park_sample)
roots = [root_dict[park] for park in parks_sampled]
parks_combined = roots+parks_sampled

In [7]:
queries = [
    "Tell me about {entity}","What is the full name of {entity}?","What is the address of {entity}?","Which state is {entity} located in?","Give me a description of {entity}",
    "Where is {entity} located?","Can you provide the full name of {entity}?","What is the location of {entity}?","Tell me the address of {entity}",
    "In which state can I find {entity}?","Describe {entity} to me"
]
ParkInfo = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "parks", intent = "ParkInfo", queries = queries)

queries = [
    "What activities can I do at {entity}?","Tell me about the activities available at {entity}","What can I do at {entity}?","List the activities at {entity}",
    "What recreational activities are offered at {entity}?","What outdoor activities can I enjoy at {entity}?","What kind of activities are there at {entity}?",
    "What are the top activities at {entity}?","What fun things can I do at {entity}?","What activities are recommended at {entity}?"
]
ParkActivities = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "activities", intent = "ParkActivities", queries = queries)

queries = [
    "What alerts are active at {entity} currently?","Are there any current alerts at {entity}?","Tell me the active alerts at {entity}","What are the current alerts for {entity}?",
    "Which alerts are active in {entity} right now?","List the active alerts at {entity}","Are there any alerts at {entity}?","What are the present alerts for {entity}?",
    "Can you provide the active alerts for {entity}?","What are the ongoing alerts at {entity}?"
]
ParkAlerts = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "alerts", intent = "ParkAlerts", queries = queries)

queries = [
    "What amenities exist at {entity}?","Tell me about the amenities at {entity}","What facilities are available at {entity}?","List the amenities at {entity}",
    "What kind of amenities can I find at {entity}?","What services and facilities does {entity} offer?","What amenities should I expect at {entity}?",
    "What conveniences are available at {entity}?","What amenities does {entity} have?","What facilities are provided at {entity}?"
]
ParkAmenities = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "amenities", intent = "ParkAmenities", queries = queries)

queries = ["What events are happening at {entity}?","Tell me about the events at {entity}","What upcoming events are scheduled at {entity}?",
    "Are there any events at {entity}?","What kind of events are held at {entity}?","What events can I attend at {entity}?",
    "Are there any special events at {entity}?","What events are planned at {entity}?","What events are currently happening at {entity}?","What activities and events are there at {entity}?"
]
ParkEvents = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "events", intent = "ParkEvents", queries = queries)

queries = [
    "How much does it cost to get into {entity}?","What is the entry fee for {entity}?", "Tell me about the entrance fees for {entity}",
    "What are the admission fees for {entity}?","Are there any fees to visit {entity}?","What is the cost of admission to {entity}?",
    "How expensive is it to visit {entity}?","What are the ticket prices for {entity}?","Do I need to pay to enter {entity}?","Are there any charges to access {entity}?"
]
ParkFees = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "feespasses", intent = "ParkFees", queries = queries)

queries =  [
    "Where can I park at {entity}?","Tell me about parking options at {entity}","What are the parking facilities like at {entity}?","Where should I park when visiting {entity}?",
    "What parking areas are available at {entity}?","Is there parking available at {entity}?","How is parking managed at {entity}?","Can I find parking near {entity}?",
    "Are there designated parking lots at {entity}?","What are the parking arrangements at {entity}?"
]
ParkParkingLots = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "parkinglots", intent = "ParkParkingLots", queries = queries)

queries = ["What things can I do at {entity}?","Tell me about attractions at {entity}","What are the attractions at {entity}?",
    "What are the main attractions of {entity}?","What are the highlights of {entity}?","What can I see and do at {entity}?",
    "What are the recreational opportunities at {entity}?","What are the popular things to do at {entity}?",
    "What experiences are available at {entity}?","What is there to do {entity}?"]
ParkThingsToDo = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "thingstodo", intent = "ParkThingsToDo", queries = queries)

queries = ["What tours can I take at {entity}?","Tell me about guided tours at {entity}","Are there any guided tours available at {entity}?","What guided experiences are offered at {entity}?",
    "Can I join any tours at {entity}?","What kind of guided tours are available at {entity}?","Are there ranger-led tours at {entity}?",
    "What are the tour options at {entity}?","How can I book a tour at {entity}?","Are there any special tours or programs at {entity}?"]
ParkTours = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "tours", intent = "ParkTours", queries = queries)

queries = ["Where are the visitor centers located at {entity}?","Tell me about visitor centers at {entity}","What visitor centers can I find at {entity}?",
    "Are there any visitor centers at {entity}?","Where can I find information centers at {entity}?","What are the visitor facilities like at {entity}?","Can you guide me to the visitor centers at {entity}?",
    "How many visitor centers are there at {entity}?","What services do the visitor centers offer at {entity}?","Are the visitor centers at {entity} open to the public?"]
ParkVisitorCenters = create_synthetic_queries(nps_api_key, entities = parks_combined, endpoint = "visitorcenters", intent = "ParkVisitorCenters", queries = queries)

In [8]:
synthetic_queries_df = pd.concat([ParkInfo, ParkActivities, ParkAlerts, ParkAmenities, ParkEvents, ParkFees], axis=0, ignore_index=True)

#### Write Outputs

In [122]:
synthetic_queries_df.to_csv('synthetic_queries.csv', index=False)
parks_df[['fullName','parkCode']].to_csv('park_to_parkcode.csv', index=False)
pd.DataFrame(list(root_dict.items()), columns=['fullName', 'parkRoot']).to_csv('park_to_root.csv', index=False)