In [1]:
import pandas as pd
import requests
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [2]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

api_base_url = 'https://developer.nps.gov/api/v1/'

park_csv_path = '../Data_API/park_to_parkcode.csv'

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\nicoesse\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\nicoesse\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\nicoesse\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [17]:
class NLTKModelFunctions:
    def __init__(self, config, park_csv_path):
        self.config = config
        self.park_codes = self.load_park_codes(park_csv_path)

    def load_park_codes(self, park_csv_path):
        """
        Loads park codes from a CSV file into a dictionary.
        
        park_csv_path (str): Path to the CSV file containing park names and their codes.
        """
        park_df = pd.read_csv(park_csv_path)
        park_codes = {}
        lemmatizer = WordNetLemmatizer()
        stop_words = set(stopwords.words('english'))
        
        for index, row in park_df.iterrows():
            tokens = word_tokenize(row['fullName'].lower())
            tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]
            normalized_park_name = ' '.join(tokens)
            park_codes[normalized_park_name] = row['parkCode']
        
        return park_codes

    def preprocess_text(self, text):
        tokens = word_tokenize(text.lower())
        stop_words = set(stopwords.words('english'))
        tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
        lemmatizer = WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(token) for token in tokens]
        return ' '.join(tokens)

    def predict_intent(self, query):
        preprocessed_query = self.preprocess_text(query)
        if 'description' in preprocessed_query:
            return 'description'
        elif 'address' in preprocessed_query:
            return 'address'
        elif 'state' in preprocessed_query:
            return 'state'
        elif 'alerts' in preprocessed_query:
            return 'alerts'
        elif 'amenities' in preprocessed_query:
            return 'amenities'
        elif 'events' in preprocessed_query:
            return 'events'
        elif 'fees' in preprocessed_query or 'passes' in preprocessed_query:
            return 'feespass'
        else:
            return 'other'

    def get_park_code(self, query):
        preprocessed_query = self.preprocess_text(query)
        print(f"Preprocessed query: {preprocessed_query}")  # Debug statement
        lemmatizer = WordNetLemmatizer()
        stop_words = set(stopwords.words('english'))
        for park_name, park_code in self.park_codes.items():
            tokens = word_tokenize(park_name)
            tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalpha() and token not in stop_words]
            normalized_park_name = ' '.join(tokens)
            if normalized_park_name in preprocessed_query:
                return park_code
        return None  # Return None if no park code is found

    def get_params(self, query):
        intent = self.predict_intent(query)
        park_code = self.get_park_code(query)

        if park_code is None:
            raise ValueError("Park not found in the query. Please provide a valid park name.")

        if intent == 'description':
            endpoint = 'parks'
        elif intent == 'address':
            endpoint = 'addresses'
        elif intent == 'state':
            endpoint = 'parks'
        elif intent == 'alerts':
            endpoint = 'alerts'
        elif intent == 'amenities':
            endpoint = 'amenities'
        elif intent == 'events':
            endpoint = 'events'
        elif intent == 'feespass':
            endpoint = 'feespasses'
        else:
            endpoint = 'parks'

        return endpoint, park_code, intent

    def api_call(self, query):
        try:
            endpoint, park_code, intent = self.get_params(query)
        except ValueError as e:
            return str(e), None, None, None

        responses = []
        limit = 1
        start = 0
        
        while True:
            params = {
                'api_key': self.config['nps_api_key'],
                'parkCode': park_code,
                'limit': limit,
                'start': start,
            }
            
            if endpoint == 'fees':
                endpoint = 'feespasses'

            request = requests.get(f"{api_base_url}{endpoint}", params=params)
            request_data = request.json()

            if endpoint == 'parks':
                responses.extend([
                    {
                        'fullName': park['fullName'],
                        'parkCode': park['parkCode'],
                        'state': park['states'],
                        'addresses': park.get('addresses', []),
                        'description': park['description']
                    } for park in request_data['data']
                ])
            else:
                responses.extend(request_data['data'])

            start += limit

            if int(start) >= int(request_data['total']):
                break

        if endpoint == 'parks':
            temp_df = pd.DataFrame(responses[0])
            addresses_df = pd.json_normalize(temp_df['addresses'])
            output = pd.concat([temp_df.drop(columns=['addresses']), addresses_df], axis=1)
        else:
            output = pd.DataFrame(responses)

        return endpoint, park_code, intent, output

In [18]:
config = {
    'nps_api_key': 'your_api'
}

In [None]:
nltk_model_functions = NLTKModelFunctions(config, park_csv_path)

In [23]:

query = 'What state is Yosemite in?'
endpoint, park_code, intent, output = nltk_model_functions.api_call(query)
print(intent)

Preprocessed query: state yosemite
None
