In [31]:
# Import required libraries
import os
from random import randint

import flask
import dash
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output, State

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

import pandas as pd
import numpy as np

from jupyter_dash import JupyterDash
import ipywidgets

from joblib import load

import pickle
import regex as re
import string
from nltk.corpus import stopwords


from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [50]:
# server = flask.Flask(__name__)
# server.secret_key = os.environ.get('secret_key', str(randint(0, 1000000)))

# external_stylesheets=[dbc.themes.YETI]

app = JupyterDash(__name__)

# Put your Dash code here

data = pd.read_csv('./assets/cities_df', index_col=0)
X = data['Attraction']
y = data['City']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)

def preprocess_df(df, column, preview=True):
    """
    Input df with raw text attractions.
    Return df with preprocessed text.
    """
    
    df[column] = df['Attraction'].apply(lambda x: x.lower())
    df[column] = df[column].apply(lambda x: re.sub('[%s]' % re.escape(string.punctuation), '', x))
    df[column] = df[column].apply(lambda x: re.sub('\w*\d\w*','', x))
    
    return df

X_train_cleaned = preprocess_df(pd.DataFrame(X_train, columns=['Attraction']),
                                'cleaned')


new_stopwords = stopwords.words('english') + list(string.punctuation)
new_stopwords += ['bali', 'barcelona', 'crete', 'dubai', 'istanbul', 'london',
                  'majorca', 'phuket', 'paris', 'rome', 'sicily', 'mallorca',
                  'goa', 'private', 'airport', 'transfer']

vectorizer = TfidfVectorizer(analyzer='word',
                             stop_words=new_stopwords,
                             decode_error='ignore')
                                
X_train_tfidf = vectorizer.fit_transform(X_train_cleaned['cleaned'])
    
model = load('./assets/non_lemmatized_model')

def preprocess_text(text):
    """
    Input raw text.
    Return preprocessed text.
    """
    
    preprocessed = text.lower()
    preprocessed = re.sub('[%s]' % re.escape(string.punctuation), '', preprocessed)
    preprocessed = re.sub('\w*\d\w*','', preprocessed)
        
    return [preprocessed]

def get_prediction(raw_text):
    try:
        preprocessed_text = preprocess_text(raw_text)
        probas = model.predict_proba(vectorizer.transform(preprocessed_text))
        classes = model.classes_
        first_pred = classes[probas.argmax()]
        second_pred = classes[np.argsort(probas)[:, 10]][0]
        return first_pred, second_pred
    except:
        pass
    

        
# The app layout
app.layout = html.Div(children=[
    html.H1(children='Where should I travel?'),

    html.Div(children='When traveling becomes a normal passtime again, where should you go? What do you want to do while on vacation?'),
    
    html.Br(),
    
    html.Div(["What do you want to do on vacation? ",
              dcc.Input(id='my-input', placeholder= 'ex. I want to go to the beach',
                        value=' ', type='text')]),
    html.Br(),
    
    html.Div(id='my-output'),

    
])
    

@app.callback(
    Output(component_id='my-output', component_property='children'),
    Input(component_id='my-input', component_property='value')
)
def update_output_div(input_value):
    first_pred, second_pred = get_prediction(input_value)
    return f'You should visit {first_pred} ...or maybe {second_pred}'


# Run app and display result inline in the notebook
# app.run_server(mode='inline')
if __name__ == '__main__':
    app.run_server(mode='inline', host='localhost')

In [49]:
    print(model.classes_)


['Bali, Indonesia' 'Barcelona, Spain' 'Crete, Greece'
 'Dubai, United Arab Emirates' 'Goa, India' 'Istanbul, Turkey'
 'London, United Kingdom' 'Majorca, Balearic Islands' 'Paris, France'
 'Phuket, Thailand' 'Rome, Italy' 'Sicily, Italy']


In [54]:

# external_stylesheets=[dbc.themes.YETI]

app = JupyterDash(__name__)

# Put your Dash code here

data = pd.read_csv('./assets/cities_df', index_col=0)
X = data['Attraction']
y = data['City']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)

def preprocess_df(df, column, preview=True):
    """
    Input df with raw text attractions.
    Return df with preprocessed text.
    """
    
    df[column] = df['Attraction'].apply(lambda x: x.lower())
    df[column] = df[column].apply(lambda x: re.sub('[%s]' % re.escape(string.punctuation), '', x))
    df[column] = df[column].apply(lambda x: re.sub('\w*\d\w*','', x))
    
    return df

X_train_cleaned = preprocess_df(pd.DataFrame(X_train, columns=['Attraction']),
                                'cleaned')


new_stopwords = stopwords.words('english') + list(string.punctuation)
new_stopwords += ['bali', 'barcelona', 'crete', 'dubai', 'istanbul', 'london',
                  'majorca', 'phuket', 'paris', 'rome', 'sicily', 'mallorca',
                  'goa', 'private', 'airport', 'transfer']

vectorizer = TfidfVectorizer(analyzer='word',
                             stop_words=new_stopwords,
                             decode_error='ignore')
                                
X_train_tfidf = vectorizer.fit_transform(X_train_cleaned['cleaned'])
    
model = load('./assets/final_pipeline')

def preprocess_text(text):
    """
    Input raw text.
    Return preprocessed text.
    """
    
    preprocessed = text.lower()
    preprocessed = re.sub('[%s]' % re.escape(string.punctuation), '', preprocessed)
    preprocessed = re.sub('\w*\d\w*','', preprocessed)
        
    return [preprocessed]

def get_prediction(raw_text):
    try:
        preprocessed_text = preprocess_text(raw_text)
        probas = model.predict_proba(vectorizer.transform(preprocessed_text))
        classes = model.classes_
        first_pred = classes[probas.argmax()]
        second_pred = classes[np.argsort(probas)[:, 10]][0]
        return first_pred, second_pred
    except:
        pass
    

        
# The app layout
app.layout = html.Div(children=[
    html.H1(children='Where should I travel?'),

    html.Div(children='When traveling becomes a normal passtime again, where should you go? What do you want to do while on vacation?'),
    
    html.Br(),
    
    html.Div(["What do you want to do on vacation? ",
              dcc.Input(id='my-input', placeholder= 'ex. I want to go to the beach',
                        value=' ', type='text')]),
    html.Br(),
    
    html.Div(id='my-output'),

    
])
    

@app.callback(
    Output(component_id='my-output', component_property='children'),
    Input(component_id='my-input', component_property='value')
)
def update_output_div(input_value):
    first_pred, second_pred = get_prediction(input_value)
    return f'You should visit {first_pred} ...or maybe {second_pred}'


# Run app and display result inline in the notebook
# app.run_server(mode='inline')
if __name__ == '__main__':
    app.run_server(mode='inline', host='localhost')

AttributeError: Can't get attribute 'PreprocessText' on <module '__main__'>