We will now create a travel app in Dash. We first load the required libraries.

In [1]:
!pip install dash



In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import re
import string
import datetime
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize
from nltk.tokenize import word_tokenize

# Download stopwords from nltk
nltk.download('stopwords')

# Download the WordNet resource
nltk.download('wordnet')

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import ComplementNB

# Import necessary Dash components
from dash import dcc, html, Dash
from dash.dependencies import Input, Output

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mskeh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\mskeh\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


We then preprocess the text and split our dataset into Train and Test.

In [3]:
travel_df = pd.read_csv('C:/Users/mskeh/Documents/GitHub/Thinkful/Capstone Projects/Final_Capstone_NLP_Search_Recommendation/Data/all_things_to_do.csv')

In [4]:
def clean_text_column(df, column_name):
    """
    Clean the specified text column in the DataFrame using NLTK for tokenization,
    stopword removal, lemmatization, and punctuation removal.

    Parameters:
    - df (pd.DataFrame): DataFrame containing the text column.
    - column_name (str): Name of the text column to be cleaned.

    Returns:
    None
    """
    # Ensure the specified column exists in the DataFrame
    if column_name not in df.columns:
        print(f"Column '{column_name}' not found in the DataFrame.")
        return

    # Define NLTK objects for stop words and lemmatization
    stop_words_ = set(stopwords.words('english'))
    wn = WordNetLemmatizer()

    def black_txt(token):
        # Check if the token is not a stop word, not a punctuation, and has a length greater than 2
        return token not in stop_words_ and token not in list(string.punctuation) and len(token) > 2

    def clean_txt(text):
        # Remove apostrophes, digits, non-word characters, and replace 'nbsp'
        text = re.sub("'", "", text)
        text = re.sub("(\\d|\\W)+", " ", text)
        text = text.replace("nbsp", "")

        # Tokenize, lemmatize, and filter based on defined conditions
        clean_text = [wn.lemmatize(word, pos="v") for word in word_tokenize(text.lower()) if black_txt(word)]

        return " ".join(clean_text)

    # Apply the cleaning function to the specified column
    df[column_name] = df[column_name].apply(clean_txt)

# Apply the clean text function to the "Things to Do"
clean_text_column(travel_df, 'Text')

In [5]:
X = travel_df['Text']
y = travel_df['Location']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = CountVectorizer(analyzer='word', decode_error='ignore', ngram_range=(1,2))
X_train_baseline = vectorizer.fit_transform(X_train)

model = ComplementNB()
model.fit(X_train_baseline, y_train)

Let's construct our Dash app.

In [None]:
Dash.default_mode = "jupyterlab"

def get_prediction(text):
    try:
        probas = model.predict_proba(vectorizer.transform([text]))
        classes = model.classes_
        top_pred = classes[probas.argmax()]
        return top_pred
    except Exception as e:
        print(f"Error during prediction: {e}")
        return None

def create_app_layout():
    """
    Create the layout for the Dash app.

    Returns:
    - layout: Dash HTML component
        The layout for the Dash app.
    """
    return html.Div(children=[
        html.H1(children='Wanderlust Wizard - Your Travel Advisor Companion', style={'textAlign': 'center', 'margin-top': '5%', 'color': '#3366cc'}),
        html.H4(children='Craft your dream journey by sharing your ideal vacation activities.', style={'textAlign': 'center', 'color': '#3366cc'}),
        html.Br(),
        html.Div([
            "What's your dream vacation activity? Tell us, and let's create your perfect adventure:  ",
            dcc.Input(id='my-input', value='', type='text',
                      placeholder='e.g., Sailing under the Northern Lights', style={'width': '65%', 'color': '#3366cc'}),
        ], style={'margin-left': '10%', 'margin-right': '10%'}),
        html.Hr(),
        html.H5(children='Your Personalized Travel Recommendation Awaits:', style={'textAlign': 'center', 'color': '#3366cc'}),
        html.H4(id='my-output', style={'textAlign': 'center', 'color': '#3366cc'}),
        html.Br(),
        html.Hr(),
        html.H5(children='Embark on a Journey of Discovery', style={'margin-left': '10%', 'margin-right': '10%', 'color': '#3366cc'}),
        html.Div(children="Join us on a journey fueled by data from 50,000 attractions across 30 locations on Trip Advisor. Our cutting-edge Complement Naive Bayes algorithm ensures tailor-made suggestions for your dream adventure, making every exploration memorable.",
                 style={'margin-left': '10%', 'margin-right': '10%', 'color': '#3366cc'}),
    ], style={'background-color': '#f2f2f2'})  # Set the default background color

def run_travel_app():
    """
    Run the Dash app for the travel recommendation system.
    """
    # Initialize the Dash app
    app = Dash(__name__)

    # Set external stylesheets if needed
    external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
    app = Dash(__name__, external_stylesheets=external_stylesheets)

    # Set the app layout
    app.layout = create_app_layout()

    # Set the app callback
    @app.callback(
        Output(component_id='my-output', component_property='children'),
        Input(component_id='my-input', component_property='value')
    )
    def update_output_div(input_value):
        if not input_value:  # Check if input is empty
            return "Please enter some text to get a recommendation"
        
        top_pred = get_prediction(input_value)
        if top_pred is not None:
            return f"{top_pred}"
        else:
            return "Error during prediction"

    # Run the app
    if __name__ == '__main__':
        app.run_server(debug=True, mode='external', host='localhost')

# Run the Dash app
run_travel_app()