In [1]:
#import local functions

In [2]:
import sys
repo_dir = '/Users/michaelkranz/Documents/restaurant-app/'
sys.path.append(repo_dir)

#for now, using locally defined tokenize text --> will modularize later
#from src.util import tokenize_text

In [3]:
#sqlalchemy and pandas for data 
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
import pandas as pd
#spacy for tokenization
from spacy.lang.en import English # Create the nlp object
import spacy
#gensim for similarity
from gensim.corpora.dictionary import Dictionary
from gensim.models.tfidfmodel import TfidfModel
from gensim.similarities.docsim import MatrixSimilarity,Similarity
#itertools for getting similarity edges
#networkx for organizing similarities
#plotly for visualization

In [4]:
from jupyter_dash import JupyterDash

import dash
from dash.dependencies import Input,Output,State
import dash_core_components as dcc
import dash_html_components as html
from dash.exceptions import PreventUpdate
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

import networkx as nx

In [4]:
nlp = spacy.load('en_core_web_sm')

def tokenize_text(text_str,nlp_obj=nlp):
    '''
    use spacy to separate text into words
    (ie tokenization)
    and return the lemmatization 
    (ie feet for footing and foot)
    for only nouns and adjectives
    
    TODO: refine methodology
    '''
    spacy_doc = nlp_obj(text_str)
    
    tokenized_doc = [
        token.lemma_
        for token in spacy_doc
        if token.pos_ in ("NOUN","ADJ","PROPN")
        ]
    
    return tokenized_doc
    #return spacy_doc

In [5]:
#needed for Jupyter lab to detect proxy config
#JupyterDash.infer_jupyter_proxy_config()

In [6]:
tokens_df = pd.read_pickle((f"{repo_dir}/data/champaign_tokenized_df.pkl"))

In [7]:
info_df = pd.read_pickle((f"{repo_dir}/data/champaign_restaurant_info.pkl")).set_index('business_id')

In [8]:
tokens_df.head(1)

Unnamed: 0,categories,reviews
pQeaRpvuhoEqudo3uymHIQ,"[ethnic food, food trucks, specialty food, ...","[empanadas, Empanadas, House, authentic, argen..."


In [9]:
info_df.head(1)

Unnamed: 0_level_0,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
pQeaRpvuhoEqudo3uymHIQ,The Empanadas House,404 E Green St,Champaign,IL,61820,40.110446,-88.233073,4.5,5,1,"{'HasTV': 'True', 'Alcohol': 'none', 'NoiseLev...","Ethnic Food, Food Trucks, Specialty Food, Impo...","{'Friday': '11:30-14:30', 'Monday': '11:30-14:..."


In [10]:
# tokenize query
query_str = 'i want Kentucky-Fried Chicken spicy chiken recipe so i can be satisfied with fast food'
query_tokens = tokenize_text(query_str)
category_tokens = ['fast food','Pizza','Barbeque']
#add to both the reviews and category columns of df
query_df = pd.DataFrame({"reviews":[query_tokens],
                      "categories":[category_tokens]},
                        index=['query'])


In [11]:
tokens_and_query_df = (
    tokens_df
    .append(query_df)
    .reset_index() #gets indices for use with corpus
    .rename(columns={"index":"business_id"})
)

In [12]:
reviews_dictionary = Dictionary(tokens_and_query_df.reviews)
reviews_corpus = [reviews_dictionary.doc2bow(doc) 
                  for doc in tokens_and_query_df.reviews]
#tfidf with document being each restaurant and corpus being all restaurants
reviews_tfidf_model = TfidfModel(reviews_corpus)
reviews_tfidf_docs = [reviews_tfidf_model[review] 
                      for review in reviews_corpus]

#similarity indices for each doc
reviews_similarity_indices = MatrixSimilarity(reviews_tfidf_docs)

In [13]:
categories_dictionary = Dictionary(tokens_and_query_df.categories)
categories_corpus = [categories_dictionary.doc2bow(doc) 
                     for doc in tokens_and_query_df.categories]
categories_tfidf_model = TfidfModel(categories_corpus)
categories_tfidf_docs = [categories_tfidf_model[category] 
                         for category in categories_corpus]

categories_similarity_indices = MatrixSimilarity(categories_tfidf_docs)

In [14]:
tokens_df['review_query'] = reviews_similarity_indices[reviews_tfidf_docs[-1]][:-1]
tokens_df['category_query'] = categories_similarity_indices[categories_tfidf_docs[-1]][:-1]

In [15]:
query_results_df = (
    info_df[['name','categories']]
    .join(tokens_df[['review_query','category_query']])
    .sort_values('review_query',ascending=False)
)   

In [16]:
#category tokens --- if match to a category, 

In [117]:
#make free text search bar and table
#click callback and update another table based on selections

In [188]:
import json

# test components

In [10]:
import json
from dash.dependencies import ALL

In [55]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
#suppress_callback_exceptions=True
app = JupyterDash(__name__, external_stylesheets=external_stylesheets,suppress_callback_exceptions=True)
#app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# Create server variable with Flask server object for use with gunicorn
server = app.server


app.layout = html.Div(
    children=[
        html.Button(id='test',children='test',n_clicks=0),
        html.Div(id='button-container'),
        html.Div(id='display')
    ]
)
@app.callback(
    Output('button-container','children'),
    Input('test','n_clicks'),
    State({'type':'button','index':ALL},'children')
)
def update_buttons(val,buttons):
    ctx = dash.callback_context
    if val==0:
        raise PreventUpdate
        
    return [
        html.Button(id={'type':'button','index':1},children='test1',n_clicks=0),
        html.Button(id={'type':'button','index':2},children='test2',n_clicks=0),
        html.Div('callback 1'),
        html.Div('item1'),
        html.Div(str(val)),
        html.Div('item2'),
        html.Div(str(buttons))
    ]
@app.callback(
    Output('display','children'),
    Input({'type':'button','index':ALL},'n_clicks'),
    State({'type':'button','index':ALL},'children'),
    State('button-container','children')
)
def update_display_2_with_intermediate(buttons,children,container):
    ctx = dash.callback_context
    if not buttons:
        raise PreventUpdate
    return [
        html.Div('callback 2'),
        html.Div('item1'),
        html.Div(str(buttons)),
        html.Div('item2'),
        html.Div(str(children)),
        html.Div('item3'),
        html.Div(str(container)),
        html.Div('INPUTS'),
        html.Pre(json.dumps(ctx.inputs)),
        html.Div('TRIGGERED'),
        html.Pre(json.dumps(ctx.triggered)),
        html.Div('STATES'),
        html.Pre(json.dumps(ctx.states))
    ]

app.run_server(port=8060)

Dash app running on http://127.0.0.1:8060/


# new app

In [38]:
colors = {
    'background': '#111111',
    'text': '#7FDBFF'
}

# assume you have a "long-form" data frame
# see https://plotly.com/python/px-arguments/ for more options
df = pd.DataFrame({
    "Fruit": ["Apples", "Oranges", "Pears", "Berries", "Mango", "Apples","Apples"],
    "Amount": range(7),
    "City": ["SF", "SF", "Chicago", "Detroit", "Las Vegas", "New York","Los Angeles"]
})

fruit_dropdown_options = [{'label':x, 'value':x} for x in df.Fruit.unique()]
city_options = df.City.unique()

In [185]:
from itertools import combinations
from random import random

def compute_line_points(x0,x1,y0,y1):
    '''
    get several points on a line between two coordinates
    '''
    slope = (y1-y0)/(x1-x0)
    
    if x0>x1:
        x_step_sign = -1
    else:
        x_step_sign = 1
    
    y_intercept = y1-(slope*x1)   
    x_pts = np.linspace(x0,x1)
    y_pts = [(slope*x)+y_intercept for x in x_pts]
    return x_pts,y_pts

In [186]:
#graph properties
node_index = range(len(city_options))
node_name = city_options
node_hover_text = city_options

edge_indices = list(combinations(node_index,2))
edge_weight = [random() for edge in edge_indices]
edge_names = list(combinations(node_name,2))
edge_hover_text = [f"{u}<-->{v}" for u,v in edge_names]

In [187]:
#make networkx graph
graph = nx.Graph()

##add nodes
#TODO: top word frequencies,location etc
#TODO: picture of restaurant and/or food for node
for i,name,hover_text in zip(node_index,
                             node_name,
                             node_hover_text):
    
    
##add edges
for nodes,weight,names,hover_text in zip(edge_indices,
                                         edge_weight,
                                         edge_names,
                                         edge_hover_text):
    graph.add_edge(
        nodes[0],
        nodes[1],
        weight=weight,
        hover_text=hover_text
    )

In [203]:
#make plotly figure using coordinates created from networkx
spring_layout = nx.drawing.layout.spring_layout
pos = spring_layout(graph,weight='weight')

node_x = [x for x,y in pos.values()]
node_y = [y for x,y in pos.values()]

node_trace = go.Scatter(
    x=node_x, 
    y=node_y,
    mode='markers',
    hovertext=node_hover_text,
    marker={'size':20,
            'line_width':2}
)
    
edge_traces = []
for node0,node1,props in graph.edges(data=True):
    #edge line viz coordinates
    x0, y0 = pos[node0]
    x1, y1 = pos[node1]
    #points
    x_pts,y_pts = compute_line_points(x0,x1,y0,y1)
    
    edge_weight = props['weight']
    edge_name = props['hover_text']
    
    trace = go.Scatter(
        x=x_pts,
        y=y_pts,
        #x=[x0,x1],
        #y=[y0,y1],
        fill='toself',
        mode='lines',
        line={'width':edge_weight*20},
        name=edge_name,
        hovertext=edge_name,
        hoveron='fills'
    )
    edge_traces.append(trace)

In [204]:
traces = edge_traces
traces.append(node_trace)

fig = go.Figure(traces,
             layout=go.Layout(
                title='<br>Network graph made with Python',
                titlefont_size=16,
                showlegend=False,
                #hovermode='closest',
                 
                #margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Python code: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

In [205]:
fig.show()

In [None]:
node_elements = [
    {
        'data': {'id': node_index, 'label': data['name']},
        'position': {'x': node_x[node_index], 'y':node_y[node_index] }
    }
    for node_index,data in graph.nodes(data=True)
]
edge_elements = [
    {'data': {'source': source_node, 'target': target_node}}
    for source_node,target_node,data in graph.edges(data=True)
]

In [75]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
#app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# Create server variable with Flask server object for use with gunicorn
server = app.server

app.layout = html.Div(
    children=[
        html.H1('Hello Dash'),
        dcc.Dropdown(id='city-filter',
                     options=fruit_dropdown_options,
                     value=None),
        html.Div(id='button-container'),
        html.Div(id='city-selection-list',
                 #style={'display': 'none'}, #display of none = hidden intermediate objects
                 children=[]),
        #html.Div(id='city-list-output')
    ]
)

ctx = dash.callback_context

def pick_top_n_of(
    df,query_name,
    top_n_of='Amount',
    n=2,
    top_n_variables='City'
):
    '''
    Filters a variable (eg columns of names), 
    sorts based on another column of values (eg amount), 
    selects the top n records ,
    returns a list of variable of interest of these records (eg names different than filter var)
    '''
    top_n_variables = (
        df.query(f"Fruit=='{query_name}'")
        .sort_values(top_n_of,ascending=False)
        .head(n)
        [top_n_variables]
    )
    return top_n_variables

def prevent_update_if_no_clicks(n_clicks_list):
    '''
    if there are no clicks in any component,
    then it has not been clicked and should not
    go through component
    
    Example Usage:
    
    if prevent_update_if_no_clicks(list)):
        raise PreventUpdate
    '''
    prevent_update = True
    for n_click in n_clicks_list:
        if n_click>0:
            prevent_update = False    
    return prevent_update

def return_components(name,ctx):
    '''
    used to debug components quickly
    
    replace output with this fxn but be wary that 
    if a callback is updated twice, 
    need to append the input state of component
    '''
    return [
        html.Div(name),
        html.Div('INPUTS'),
        html.Pre(json.dumps(ctx.inputs)),
        html.Div('TRIGGERED'),
        html.Pre(json.dumps(ctx.triggered)),
        html.Div('STATES'),
        html.Pre(json.dumps(ctx.states))
    ]

#update city button list
#indicate whether selected or not
@app.callback(
    Output('button-container', 'children'),
    Input('city-filter','value'), #change button if diff buttons displayed 
    Input('city-selection-list','children'), #change button if selection (ie button click)
    State({'type':'button','index':ALL},'children') #get current buttons
)
def draw_buttons(city_filter,city_list,buttons):
    '''
    fires for the two events that change button properties:
    that is,either a selection (change color to indicate it is selected) 
    or a new button list (change button text and add the selection color of running selection list)
    '''
    #1- get the text to display in the button
    triggered_button = ctx.triggered[0]
    triggered_id,triggered_property = triggered_button['prop_id'].split('.')

    #2a - change text(ie names) and selection color
    if triggered_id == 'city-filter':
        top_cities = pick_top_n_of(df,city_filter)
    #2b - keep text(ie names) but  changing selection color
    elif triggered_id == 'city-selection-list':
        top_cities = buttons
    else:
        raise PreventUpdate
    
    #3 - get three button properties
    button_index = range(len(top_cities))
    button_color = ['red' if city in city_list else 'black' for city in top_cities]
    button_name = top_cities
    
    #4 - create button html components
    buttons_in_container = [
        html.Button(
            id={'type':'button','index':index},
            children=name,
            style={'color':color},
            n_clicks=0
        )
        for index,color,name in zip(button_index,
                                    button_color,
                                    button_name)
    ]
     
    return buttons_in_container
    

@app.callback(
    Output('city-selection-list','children'),
    Input({'type':'button','index':ALL},'n_clicks'),
    State('city-selection-list','children'), #determines if triggered button is selected
    State({'type':'button','index':ALL},'children'), #names of individual buttons
)
def update_selections(
    button_n_clicks,
    city_list,
    button_names
):
   '''
   keeps a running list of the selected buttons
   
   if a click happens and button is selected, then it is deselected
   '''
    ctx = dash.callback_context
    
    #1-check for button click
    if prevent_update_if_no_clicks(button_n_clicks) or len(button_n_clicks)==0:
        raise PreventUpdate
        
    #2-get triggered button information
    triggered_button = ctx.triggered[0]
    triggered_button_id,triggered_button_property = triggered_button['prop_id'].split('.')
    triggered_button_name = ctx.states[f'{triggered_button_id}.children']
    
    triggered_is_option = triggered_button_name in city_options
    triggered_is_selected = triggered_button_name in city_list
    
    #3a - add triggered button info to selected list
    if not triggered_is_selected and triggered_is_option: #select city 
        city_list.append(triggered_button_name)
    #3b - deselect city
    else: 
        city_list.remove(triggered_button_name)

    return city_list

@app.callback(
    Output('city-selection-output','children'),
    Input('city-selection-list','children')
def visualize_selections():
    pass


app.run_server(port=8061)

Dash app running on http://127.0.0.1:8061/
1    SF
Name: City, dtype: object
['SF']
['SF']
['SF']
2    Chicago
Name: City, dtype: object
['Chicago']
['Chicago']
['Chicago']
4    Las Vegas
Name: City, dtype: object
['Las Vegas']
1    SF
Name: City, dtype: object
['SF']
['SF']
6    Los Angeles
5       New York
Name: City, dtype: object
['Los Angeles', 'New York']
['Los Angeles', 'New York']
['Los Angeles', 'New York']
2    Chicago
Name: City, dtype: object
['Chicago']
['Chicago']
6    Los Angeles
5       New York
Name: City, dtype: object
['Los Angeles', 'New York']
['Los Angeles', 'New York']
['Los Angeles', 'New York']
2    Chicago
Name: City, dtype: object
['Chicago']
3    Detroit
Name: City, dtype: object


In [66]:
test=[]

In [70]:
if test:
    print('true')

n clicks [0]
n clicks [1]
Detroit
n clicks [0]
n clicks [1]
Chicago


Inputs to update button list

dropdown selection
button click

Inputs to update city list
button click

Inputs to update button attributes
button list


