In [1]:
#import local functions

In [1]:
import sys
repo_dir = '/Users/michaelkranz/Documents/restaurant-app/'
sys.path.append(repo_dir)

#for now, using locally defined tokenize text --> will modularize later
#from src.util import tokenize_text

In [2]:
#sqlalchemy and pandas for data 
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
import pandas as pd
#spacy for tokenization
from spacy.lang.en import English # Create the nlp object
import spacy
#gensim for similarity
from gensim.corpora.dictionary import Dictionary
from gensim.models.tfidfmodel import TfidfModel
from gensim.similarities.docsim import MatrixSimilarity,Similarity
#itertools for getting similarity edges
#networkx for organizing similarities
#plotly for visualization

In [3]:
from jupyter_dash import JupyterDash

import dash
from dash.dependencies import Input,Output
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

import networkx as nx

In [4]:
nlp = spacy.load('en_core_web_sm')

def tokenize_text(text_str,nlp_obj=nlp):
    '''
    use spacy to separate text into words
    (ie tokenization)
    and return the lemmatization 
    (ie feet for footing and foot)
    for only nouns and adjectives
    
    TODO: refine methodology
    '''
    spacy_doc = nlp_obj(text_str)
    
    tokenized_doc = [
        token.lemma_
        for token in spacy_doc
        if token.pos_ in ("NOUN","ADJ","PROPN")
        ]
    
    return tokenized_doc
    #return spacy_doc

In [5]:
#needed for Jupyter lab to detect proxy config
#JupyterDash.infer_jupyter_proxy_config()

In [6]:
tokens_df = pd.read_pickle((f"{repo_dir}/data/champaign_tokenized_df.pkl"))

In [7]:
info_df = pd.read_pickle((f"{repo_dir}/data/champaign_restaurant_info.pkl")).set_index('business_id')

In [8]:
tokens_df.head(1)

Unnamed: 0,categories,reviews
pQeaRpvuhoEqudo3uymHIQ,"[ethnic food, food trucks, specialty food, ...","[empanadas, Empanadas, House, authentic, argen..."


In [9]:
info_df.head(1)

Unnamed: 0_level_0,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
pQeaRpvuhoEqudo3uymHIQ,The Empanadas House,404 E Green St,Champaign,IL,61820,40.110446,-88.233073,4.5,5,1,"{'HasTV': 'True', 'Alcohol': 'none', 'NoiseLev...","Ethnic Food, Food Trucks, Specialty Food, Impo...","{'Friday': '11:30-14:30', 'Monday': '11:30-14:..."


In [10]:
# tokenize query
query_str = 'i want Kentucky-Fried Chicken spicy chiken recipe so i can be satisfied with fast food'
query_tokens = tokenize_text(query_str)
category_tokens = ['fast food','Pizza','Barbeque']
#add to both the reviews and category columns of df
query_df = pd.DataFrame({"reviews":[query_tokens],
                      "categories":[category_tokens]},
                        index=['query'])


In [11]:
tokens_and_query_df = (
    tokens_df
    .append(query_df)
    .reset_index() #gets indices for use with corpus
    .rename(columns={"index":"business_id"})
)

In [12]:
reviews_dictionary = Dictionary(tokens_and_query_df.reviews)
reviews_corpus = [reviews_dictionary.doc2bow(doc) 
                  for doc in tokens_and_query_df.reviews]
#tfidf with document being each restaurant and corpus being all restaurants
reviews_tfidf_model = TfidfModel(reviews_corpus)
reviews_tfidf_docs = [reviews_tfidf_model[review] 
                      for review in reviews_corpus]

#similarity indices for each doc
reviews_similarity_indices = MatrixSimilarity(reviews_tfidf_docs)

In [13]:
categories_dictionary = Dictionary(tokens_and_query_df.categories)
categories_corpus = [categories_dictionary.doc2bow(doc) 
                     for doc in tokens_and_query_df.categories]
categories_tfidf_model = TfidfModel(categories_corpus)
categories_tfidf_docs = [categories_tfidf_model[category] 
                         for category in categories_corpus]

categories_similarity_indices = MatrixSimilarity(categories_tfidf_docs)

In [14]:
tokens_df['review_query'] = reviews_similarity_indices[reviews_tfidf_docs[-1]][:-1]
tokens_df['category_query'] = categories_similarity_indices[categories_tfidf_docs[-1]][:-1]

In [15]:
query_results_df = (
    info_df[['name','categories']]
    .join(tokens_df[['review_query','category_query']])
    .sort_values('review_query',ascending=False)
)   

In [16]:
#category tokens --- if match to a category, 

In [117]:
#make free text search bar and table
#click callback and update another table based on selections

In [188]:
import json

In [237]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
#app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# Create server variable with Flask server object for use with gunicorn
server = app.server

In [238]:
colors = {
    'background': '#111111',
    'text': '#7FDBFF'
}

# assume you have a "long-form" data frame
# see https://plotly.com/python/px-arguments/ for more options
df = pd.DataFrame({
    "Fruit": ["Apples", "Oranges", "Pears", "Berries", "Mango", "Apples","Apples"],
    "Amount": range(7),
    "City": ["SF", "SF", "Chicago", "Detroit", "Las Vegas", "New York","Los Angeles"]
})

fruit_dropdown_options = [{'label':x, 'value':x} for x in df.Fruit.unique()]
city_options = df.City.unique()

In [239]:
#for i,color in colors.items():
#    colors[i] = 'test'

In [240]:
fig = px.bar(df, x="Fruit", y="Amount", color="City", barmode="group")

fig.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)

city_list = []

app.layout = html.Div(
    children=[
        html.H1('Hello Dash'),
        html.Div(children='Dash: A web application framework for Python.', 
                 style={
                     'textAlign': 'center',
                     'color': colors['text']
                 }
                ),
        dcc.Dropdown(id='city-filter',
                     options=fruit_dropdown_options,
                     value=None),
        html.Br(),
        html.Button(id='button-1',children='test-1'),
        html.Br(),
        html.Button(id='button-2',children='test-2'),
        html.Br(),
        # Hidden div inside the app that stores the intermediate value
        html.Div(id='city-list-intermediate', style={'display': 'none'})
        html.Div(id='city-list')
    ]
)
                    
@app.callback(
    Output('button-1', 'children'),
    Output('button-2','children'),
    Output('button-1', 'n_clicks'),
    Output('button-2','n_clicks'),
    Input('city-filter','value')
)
def update_button_content(query_str):
    #1- get the stuff to display in the button
    df_query = (
        df.query(f"Fruit=='{query_str}'")
        .sort_values('Amount',ascending=False)
        ['City']
    )
    
    if len(df_query)==2:
        city_1 = df_query.iloc[0]
        city_2 = df_query.iloc[1]
    elif len(df_query)==1:
        city_1 = df_query.iloc[0]
        city_2 = 'No city'
    else:
        city_1 = 'No city'
        city_2 = 'No city'
    
    print('update button')
    
    #2- reset click count for identifying if clicking selects or de-selects option
    ctx = dash.callback_context
    if len(ctx.triggered)==0:
        city_1_n_clicks = 0
        city_2_n_clicks = 0
    #print(json.dumps({
    #    'states': ctx.states,
    #    'triggered': ctx.triggered,
    #    'inputs': ctx.inputs
    #}, indent=2))
    return (
        city_1,city_2,
        city_1_n_clicks,city_2_n_clicks
    )
   
@app.callback(
    Output('city-list','children'),
    Input('button-1','children'),
    Input('button-2','children'),
    Input('button-1','n_clicks'),
    Input('button-2','n_clicks')
)
def update_button_selections_if_clicked(
    button_1_children,
    button_2_children,
    button_1_n_clicks,
    button_2_n_clicks
):
    ctx = dash.callback_context
    
    if not ctx.triggered:
        #to do --- make default list
        button_id = 'No action'
    else:
        triggered_button = ctx.triggered[0]
        triggered_button_id,triggered_button_property = triggered_button['prop_id'].split('.')
        triggered_button_value = triggered_button['value']
        triggered_button_name = ctx.inputs['.'.join([triggered_button_id,'children'])]
        
        #when button html (ie text) changes, will trigger still but n_clicks count remains so set to 0
        if triggered_button_property=='children':
            for input_property,input_value in ctx.inputs.items():
                if 'n_clicks' in input_property:
                    ctx.inputs[input_property] = 0
        #only want to update list if button is clicked
        #if triggered_button_property=='n_clicks':
        elif triggered_button_property=='n_clicks':
            
            triggered_is_option = triggered_button_name in city_options
            triggered_is_selected = triggered_button_value%2==1
            triggered_is_displayed = triggered_button_name in city_list
            
            if triggered_is_selected and triggered_is_option and not triggered_is_displayed: #select city 
                city_list.append(triggered_button_name)
                #TODO: color triggered value if selected
            elif not triggered_is_selected and triggered_is_option and triggered_is_displayed: #deselect city
                city_list.remove(triggered_button_name)
                #TODO: de-color triggered value if de-selected
        
        selected_buttons_html = [html.Div(city) for city in city_list]

    ctx_msg = json.dumps({
        'states': ctx.states,
        'triggered': ctx.triggered,
        'inputs': ctx.inputs
    }, indent=2)
    #return selected_buttons_html.append(html.Div(ctx_msg))
    return html.Div(
        html.Pre(ctx_msg)
    )
    
@app.callback(
    Output('button-1','style'),
    Output('button-2','style'),
    Input('city-list','children')
 )
def update_button_style_if_clicked():
    ctx = dash.callback_context
    print(ctx.inputs)
    pass
    

     
                                      

SyntaxError: invalid syntax (<ipython-input-240-21a03199a77e>, line 134)

In [235]:
#button --> div(restaurant info) --> if button clicked then append to list

#free text--> input and state(finished)
#button click --> append to list

In [236]:
app.run_server(port=8060,mode='inline')

update button
{
  "states": {},
  "triggered": [],
  "inputs": {
    "city-filter.value": null
  }
}
update button
{
  "states": {},
  "triggered": [
    {
      "prop_id": "city-filter.value",
      "value": "Oranges"
    }
  ],
  "inputs": {
    "city-filter.value": "Oranges"
  }
}


In [None]:
dash.