In [1]:
import dash
from dash.dependencies import Input, Output
import dash_html_components as html
import dash_core_components as dcc
import pandas as pd
import flask
from flask_cors import CORS
import os
import pickle
from sklearn.metrics.pairwise import pairwise_distances
from scipy.stats import entropy
import networkx as nx

In [2]:
app = dash.Dash('restaurant_network')
server = app.server

In [3]:
df = pd.read_pickle('../data/df_final_urls_updated.pkl')

In [4]:

with open('../data/thresh_to_graph.pkl','rb') as f:
    thresh_to_graph = pickle.load(f)
with open('../data/thresh_to_XnYn.pkl','rb') as f:
    thresh_to_XnYn = pickle.load(f)
with open('../data/thresh_to_pos.pkl','rb') as f:
    thresh_to_pos = pickle.load(f)

In [6]:
with open('../data/hyperlinks.pkl', 'rb') as f:
    links = pickle.load(f)

In [7]:
len(links)

228

In [8]:
links[0]

['World Of Beer',
 'https://www.yelp.com/biz/world-of-beer-henderson',
 'https://s3-media4.fl.yelpcdn.com/bphoto/2USetvOI2lp1La_DKR-kCA/ls.jpg']

In [9]:
links_df = pd.DataFrame(data=links, columns=['NAME', 'PAGE', 'IMG_URL'])

In [10]:
links_df

Unnamed: 0,NAME,PAGE,IMG_URL
0,World Of Beer,https://www.yelp.com/biz/world-of-beer-henderson,https://s3-media4.fl.yelpcdn.com/bphoto/2USetv...
1,Pho Kim Long,https://www.yelp.com/biz/pho-kim-long-las-vegas,https://s3-media4.fl.yelpcdn.com/bphoto/Di1D9w...
2,Ohjah Noodle House,https://www.yelp.com/biz/ohjah-noodle-house-la...,https://s3-media4.fl.yelpcdn.com/bphoto/GCOwdw...
3,Metro Diner,https://www.yelp.com/search?ns=1&find_desc=Met...,https://s3-media1.fl.yelpcdn.com/bphoto/HrWg-Q...
4,Egg & I,https://www.yelp.com/search?ns=1&find_desc=Egg...,https://s3-media4.fl.yelpcdn.com/bphoto/zYj2gS...
5,Capo's Italian Cuisine,https://www.yelp.com/biz/capos-italian-cuisine...,https://s3-media2.fl.yelpcdn.com/bphoto/x-EGmJ...
6,Hakkasan Nightclub,https://www.yelp.com/biz/hakkasan-nightclub-la...,https://s3-media2.fl.yelpcdn.com/bphoto/sMbzIi...
7,Andre's Bistro & Bar,https://www.yelp.com/biz/andres-bistro-and-bar...,https://s3-media1.fl.yelpcdn.com/bphoto/ZjZFk8...
8,Wahlburgers,https://www.yelp.com/biz/wahlburgers-las-vegas-2,https://s3-media2.fl.yelpcdn.com/bphoto/Noi7Uk...
9,Morimoto,https://www.yelp.com/biz/morimoto-las-vegas,https://s3-media1.fl.yelpcdn.com/bphoto/Q8d3tF...


In [11]:
df.NAME.nunique()

227

In [12]:
df.drop(['PAGE', 'IMG_URL'], axis=1, inplace=True)

In [13]:
df.NAME.nunique()

227

In [14]:
df.head()

Unnamed: 0,NAME,is_strip,stars,Topic1,Topic2,Topic3,Topic4,Topic5,Topic6,Topic7,...,Topic16,Topic17,Topic18,temp_id,topic1_for_disp,topic2_for_disp,topic3_for_disp,threshold,Xn,Yn
0,Jaburritos,True,4.5 stars,0.031864,0.001086,0.005289,0.007907,0.006254,0.015224,0.013545,...,0.054163,0.013609,0.022229,0,"High Customer Satisfaction: food, service, great","Mexican: taco, mexican, burrito","Lunch: location, place, lunch",0.55,-0.188747,0.013367
1,Le Thai,False,4.0 stars,0.000827,0.081257,0.002911,0.053077,0.575448,0.005392,0.000505,...,0.173229,0.003961,0.005508,1,"Asian: rice, thai, noodle","Fast Food: burger, fry, shake","Bars: bar, beer, drink",0.55,0.141537,0.71538
2,Sugar Factory,True,3.5 stars,0.007088,0.222558,0.009247,0.10758,0.003468,0.009972,0.016713,...,0.318069,0.021889,0.019387,2,"Fast Food: burger, fry, shake","Bars: bar, beer, drink","Fine Dining: dessert, restaurant, dinner",0.55,,
3,Rollin' Smoke BBQ,False,4.0 stars,0.00486,0.000439,0.021188,0.011603,0.042555,0.007211,0.012455,...,0.03354,0.616419,0.10708,3,"Breakfast: sandwich, egg, chicken","Low Customer Satisfaction: bad, order, tell","Coffee Shop: chocolate, tea, coffee",0.55,0.387596,-0.145805
4,PublicUs,False,4.5 stars,0.008916,0.428269,0.010909,0.016861,0.001839,0.003052,0.004282,...,0.101793,0.177481,0.037548,4,"Bars: bar, beer, drink","Breakfast: sandwich, egg, chicken","Fast Food: burger, fry, shake",0.55,,


In [15]:
df = df[pd.notnull(df['NAME'])]

In [16]:
df.isnull().sum()

NAME                  0
is_strip              0
stars                 0
Topic1                0
Topic2                0
Topic3                0
Topic4                0
Topic5                0
Topic6                0
Topic7                0
Topic8                0
Topic9                0
Topic10               0
Topic11               0
Topic12               0
Topic13               0
Topic14               0
Topic15               0
Topic16               0
Topic17               0
Topic18               0
temp_id               0
topic1_for_disp       0
topic2_for_disp       0
topic3_for_disp       0
threshold             0
Xn                 1146
Yn                 1146
dtype: int64

In [17]:
df.NAME.nunique()

227

In [18]:
df.shape

(1888, 28)

In [19]:
df = pd.merge(df, links_df, how='left', on=['NAME'])

In [20]:
df.head()

Unnamed: 0,NAME,is_strip,stars,Topic1,Topic2,Topic3,Topic4,Topic5,Topic6,Topic7,...,Topic18,temp_id,topic1_for_disp,topic2_for_disp,topic3_for_disp,threshold,Xn,Yn,PAGE,IMG_URL
0,Jaburritos,True,4.5 stars,0.031864,0.001086,0.005289,0.007907,0.006254,0.015224,0.013545,...,0.022229,0,"High Customer Satisfaction: food, service, great","Mexican: taco, mexican, burrito","Lunch: location, place, lunch",0.55,-0.188747,0.013367,https://www.yelp.com/biz/jaburritos-las-vegas-14,https://s3-media1.fl.yelpcdn.com/bphoto/dx8gE1...
1,Le Thai,False,4.0 stars,0.000827,0.081257,0.002911,0.053077,0.575448,0.005392,0.000505,...,0.005508,1,"Asian: rice, thai, noodle","Fast Food: burger, fry, shake","Bars: bar, beer, drink",0.55,0.141537,0.71538,https://www.yelp.com/biz/le-thai-las-vegas,https://s3-media3.fl.yelpcdn.com/bphoto/vYnAqI...
2,Sugar Factory,True,3.5 stars,0.007088,0.222558,0.009247,0.10758,0.003468,0.009972,0.016713,...,0.019387,2,"Fast Food: burger, fry, shake","Bars: bar, beer, drink","Fine Dining: dessert, restaurant, dinner",0.55,,,https://www.yelp.com/biz/sugar-factory-las-veg...,https://s3-media4.fl.yelpcdn.com/bphoto/HoliXl...
3,Rollin' Smoke BBQ,False,4.0 stars,0.00486,0.000439,0.021188,0.011603,0.042555,0.007211,0.012455,...,0.10708,3,"Breakfast: sandwich, egg, chicken","Low Customer Satisfaction: bad, order, tell","Coffee Shop: chocolate, tea, coffee",0.55,0.387596,-0.145805,https://www.yelp.com/biz/rollin-smoke-bbq-las-...,https://s3-media3.fl.yelpcdn.com/bphoto/nouR2j...
4,PublicUs,False,4.5 stars,0.008916,0.428269,0.010909,0.016861,0.001839,0.003052,0.004282,...,0.037548,4,"Bars: bar, beer, drink","Breakfast: sandwich, egg, chicken","Fast Food: burger, fry, shake",0.55,,,https://www.yelp.com/biz/publicus-las-vegas,https://s3-media2.fl.yelpcdn.com/bphoto/ethBof...


In [21]:
df.shape

(1888, 30)

In [34]:
df.to_pickle('./data/df_final_urls_updated.pkl')

In [38]:
if 'DYNO' in os.environ:
    app.scripts.append_script({
        'external_url': 'https://cdn.rawgit.com/chriddyp/ca0d8f02a1659981a0ea7f013a378bbd/raw/e79f3f789517deec58f41251f7dbb6bee72c44ab/plotly_ga.js'
    })
    
BACKGROUND = 'rgb(230, 230, 230)'

COLORSCALE = [ [0, "rgb(244,236,21)"], [0.3, "rgb(249,210,41)"], [0.4, "rgb(134,191,118)"],
                [0.5, "rgb(37,180,167)"], [0.65, "rgb(17,123,215)"], [1, "rgb(54,50,153)"] ]

In [39]:
# arbitrary threshold for deciding whether 2 observations are 'similar' or not
threshold_all = [0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62]
def th_mark(x):
    if x==min(threshold_all):
        return 'Low'
    elif x==max(threshold_all):
        return 'High'
    else:
        return ''
    
threshold_mark = {str(th):th_mark(th) for th in threshold_all}

In [40]:
def update_slider_mark(slider_mark, font_size):
    # update display style of position markers for the slider
    slider_mark_updated = {}
    for position in slider_mark:
        slider_mark_updated[position] = {
            'label': slider_mark[position],
            'style': {'fontSize':font_size, 'font-family': 'Arial'}
        }
    return slider_mark_updated

threshold_mark_updated = update_slider_mark(threshold_mark, 15)

In [9]:
# figure data is the data object we pass into figure function 
# molecules will be the selected business
# change this function for our needs
def add_markers(selected_threshold, df, molecules, plot_type = 'scatter' ):
    indices = []
    drug_data = df
    for m in molecules:
        # this is the text attribute of data object 
        hover_text = drug_data.NAME.tolist()
        for i in range(len(hover_text)):
            if m == hover_text[i]:
                indices.append(i)

    trace_markers = []
    for point_number in indices:
        trace = dict(
            x = [drug_data.loc[point_number,'Xn']],
            y = [drug_data.loc[point_number,'Yn']],
            marker = dict(
                color = 'rgb(102, 255, 51)',
                size = 20,
                opacity = 0.6,
                symbol = 'cross'),
                hoverinfo=None,
                showlegend=False,
            type = plot_type
        )
        trace_markers.append(trace)  
        print(trace_markers)
    Xn_strip, Yn_strip, Xn_notstrip, Yn_notstrip = thresh_to_XnYn[selected_threshold]
    trace_nodes1 =[]
    trace_nodes2 = []
    # define a trace for plotly
    trace_nodes1 = dict(type='scatter', 
                        x=Xn_strip, 
                        y=Yn_strip,
                        mode='markers',
                        marker=dict(symbol='dot', 
                                    size=10, color='rgb(255,0,0)'),
                        name='On The Strip',
                        showlegend=True, 
                        text = [],
                        hoverinfo='text',
                        visible=True)
    trace_nodes2 = dict(type='scatter', 
                        x=Xn_notstrip, 
                        y=Yn_notstrip,
                        mode='markers',
                        marker=dict(symbol='dot', 
                                    size=10, color='rgb(0, 0, 255)'),
                        name='Not on The Strip',
                        showlegend=True, 
                        text = [],
                        hoverinfo='text',
                        visible=True)        
    
    # record the coordinates of the ends of edges
    Xe = []
    Ye = []
    G = thresh_to_graph[selected_threshold]
    for e in G.edges():
        pos = thresh_to_pos[selected_threshold]
        Xe.extend([pos[e[0]][0], pos[e[1]][0], None])
        Ye.extend([pos[e[0]][1], pos[e[1]][1], None])

    # trace_edges defines the graph edges as a trace of type scatter (line)
    trace_edges = []
    trace_edges=dict(type='scatter',
                     mode='lines',
                     x=Xe,
                     y=Ye,
                     line=dict(width=0.1, color='rgb(51, 51, 51)'),
                     hoverinfo='none', showlegend=False)
    traces = [trace_nodes1] + [trace_nodes2] + [trace_edges]+trace_markers
    return traces

# change this function 
def scatter_plot_3d(selected_threshold, xlabel='', ylabel='', plot_type='scatter', markers=[]):
    df_for_plot = df.copy()
    df_for_plot = df_for_plot.loc[(df_for_plot.Xn.isnull() == False) & (df_for_plot.Yn.isnull() == False) & (df_for_plot.threshold == selected_threshold), :]
    df_for_plot = df_for_plot.reset_index().drop('index',axis=1)
    x= df_for_plot['Xn'] 
    y= df_for_plot['Yn']
#     size= df_for_plot['MW']
#     color= df_for_plot['MW']
    def axis_template_2d(title):
        return dict(
            xgap = 10, ygap = 10,
            backgroundcolor = BACKGROUND,
            gridcolor = 'rgb(255, 255, 255)',
            title = title,
            zerolinecolor = 'rgb(255, 255, 255)',
            showspikes=True,
            spikethickness=1,
            spikedash='solid',
            spikemode='across',
            showticklabels = False
        )
    # change data
    data = [dict(
        x = x,
        y = y,
        mode = 'markers',
        text = df_for_plot['NAME'],
        type = plot_type,
    ) ]
    # change layour
    layout = dict(
        font = dict( family = 'Raleway' ),
        hovermode = 'closest',
        hoverdistance = 15,
        margin = dict( r=0, t=0, l=0, b=0 ),
        showlegend = True,
        legend=dict(x=0, y=1)
    )
    # change scatter
    if plot_type in ['scatter']:
        layout['xaxis'] = axis_template_2d(xlabel)
        layout['yaxis'] = axis_template_2d(ylabel)
        layout['plot_bgcolor'] = 'rgba(0,0,0,0)'
        layout['paper_bgcolor'] = 'rgba(0,0,0,0)'
        
    # keep this
    
    data = add_markers(selected_threshold, df_for_plot, markers, plot_type = plot_type )

    return dict(data=data, layout=layout)

selected_threshold = threshold_all[0]
FIGURE = scatter_plot_3d(selected_threshold)
df_for_plot = df.copy()
df_for_plot = df_for_plot.loc[(df_for_plot.Xn.isnull() == False) & (df_for_plot.Yn.isnull() == False) & (df_for_plot.threshold == selected_threshold), :]
df_for_plot = df_for_plot.reset_index().drop('index',axis=1)
STARTING_DRUG = df_for_plot.loc[0,'NAME']
# DRUG_DESCRIPTION = df_for_plot.loc[df_for_plot['NAME'] == STARTING_DRUG]['DESC'].iloc[0]
STAR_RATING =df_for_plot.loc[df_for_plot['NAME'] == STARTING_DRUG]['stars'].iloc[0]
DRUG_IMG = df_for_plot.loc[df_for_plot['NAME'] == STARTING_DRUG]['IMG_URL'].iloc[0]
topic1 = df_for_plot.loc[df_for_plot['NAME'] == STARTING_DRUG]['topic1_for_disp'].iloc[0]
topic2 = df_for_plot.loc[df_for_plot['NAME'] == STARTING_DRUG]['topic2_for_disp'].iloc[0]
topic3 = df_for_plot.loc[df_for_plot['NAME'] == STARTING_DRUG]['topic3_for_disp'].iloc[0]

In [10]:
app.layout = html.Div([
    html.H2('Network of Restaurants based on User Reviews',style ={'textAlign':'center'}),
    # Row 1: Header and Intro text
    html.Div([
        html.Div([
            html.Div([
            ], style={'margin-left': '10px'}),
            dcc.Dropdown(id='chem_dropdown',
                        multi=True,
                        value=[ STARTING_DRUG ],
                        options=[{'label': i, 'value': i} for i in df_for_plot['NAME'].tolist()]),
            ], className='twelve columns' )

    ], className='row' ),

    # Row 2: Hover Panel and Graph
    html.Div([
        html.Div([
        html.Div([
            html.Br(),
            
            html.Img(id='chem_img', src=DRUG_IMG,style=dict(width='150px',height='150px')),

            html.Br(),
            
            html.A(STARTING_DRUG,
                  id='chem_name',
                  href="https://www.drugbank.ca/drugs/DB01002",
                  target="_blank"),

            html.Br(),
### Star rating
            html.Div([
                    html.Div(html.B("Average Rating")),
                    html.Div(STAR_RATING, id='star_rating',style={'marginTop':'0.005em'})]),

            html.Br(),
### Words in a topictopic1_for_disp
            html.Div([
                    html.Div(html.B("Major Topics in Reviews")),
                    html.Div(topic1, id='topic1',style={'marginTop':'0.005em'}),
                    html.Div(topic2, id='topic2',style={'marginTop':'0.005em'}),
                    html.Div(topic3, id='topic3',style={'marginTop':'0.005em'})]), 

        ],className ='row',style=dict(height = '450px')),
        html.Br(),
        html.Div([
        html.Div([
            html.Div('Similarity Cutoff',style=dict( maxHeight='200px', fontSize='20px' )),
            dcc.Slider(
                id='threshold-slider',
                min=min(threshold_all),
                max=max(threshold_all),
                value=selected_threshold,
                step=None,
                marks=threshold_mark_updated),
                ], style={'width': '100%','marginBottom': 0, 'marginTop': 0, 'marginLeft':'auto', 'marginRight':'auto',
                  'fontSize':12, 'font-family': 'Arial'},className='three columns')
    ],className='row')
        
        ], className='three columns', style=dict(height='300px')),

        html.Div([
            dcc.Graph(id='clickable-graph',
                      style=dict(width='700px',height='550px'),
                      hoverData=dict(points=[dict(pointNumber=0)] ),
                      figure=FIGURE ),

        ], className='nine columns', style=dict(textAlign='center')),
    ], className='row' ),

], className='container')


@app.callback(
    Output('clickable-graph', 'figure'),
    [Input('chem_dropdown', 'value'), Input('threshold-slider', 'value')])
def highlight_molecule(chem_dropdown_values, selected_threshold):
    return scatter_plot_3d(selected_threshold=selected_threshold, 
                           markers = chem_dropdown_values, plot_type = 'scatter')



def dfRowFromHover(hoverData,selected_threshold):
    ''' Returns row for hover point as a Pandas Series '''
    if hoverData is not None:
        if 'points' in hoverData:
            firstPoint = hoverData['points'][0]
            if 'pointNumber' in firstPoint:
                point_number = firstPoint['pointNumber']
#                 molecule_name = str(FIGURE['data'][0]['text'][point_number]).strip()

                df_for_plot = df.copy()
                df_for_plot = df_for_plot.loc[(df_for_plot.Xn.isnull() == False) & (df_for_plot.Yn.isnull() == False) & (df_for_plot.threshold == selected_threshold), :]
                df_for_plot = df_for_plot.reset_index().drop('index',axis=1)
                molecule_name = df_for_plot.loc[point_number,'NAME']
                return df_for_plot.loc[df_for_plot['NAME'] == molecule_name]
    return pd.Series()


@app.callback(
    dash.dependencies.Output('chem_dropdown', 'options'),
    [dash.dependencies.Input('threshold-slider', 'value')])
def set_dropdown_options(selected_threshold):
    df_for_plot = df.copy()
    df_for_plot = df_for_plot.loc[(df_for_plot.Xn.isnull() == False) & (df_for_plot.Yn.isnull() == False) & (df_for_plot.threshold == selected_threshold), :]
    df_for_plot = df_for_plot.reset_index().drop('index',axis=1)
    return [{'label': i, 'value': i} for i in df_for_plot['NAME'].tolist()]

@app.callback(
    dash.dependencies.Output('chem_dropdown', 'value'),
    [dash.dependencies.Input('chem_dropdown', 'options')])
def set_dropdown_value(available_options):
    return available_options[0]['value']

@app.callback(
    Output('chem_name', 'children'),
    [Input('clickable-graph', 'hoverData'),Input('threshold-slider', 'value')])
def return_molecule_name(hoverData,selected_threshold):
    if hoverData is not None:
        if 'points' in hoverData:
            firstPoint = hoverData['points'][0]
            if 'pointNumber' in firstPoint:
                point_number = firstPoint['pointNumber']
                df_for_plot = df.copy()
                df_for_plot = df_for_plot.loc[(df_for_plot.Xn.isnull() == False) & (df_for_plot.Yn.isnull() == False) & (df_for_plot.threshold == selected_threshold), :]
                df_for_plot = df_for_plot.reset_index().drop('index',axis=1)
                try:
                    molecule_name = df_for_plot.loc[point_number,'NAME']
                except KeyError:
                    return None
    
                return molecule_name


@app.callback(
    dash.dependencies.Output('chem_name', 'href'),
    [dash.dependencies.Input('clickable-graph', 'hoverData'),Input('threshold-slider', 'value')])
def return_href(hoverData,selected_threshold):
    row = dfRowFromHover(hoverData,selected_threshold)
    if row.empty:
        return
    datasheet_link = row['PAGE'].iloc[0]
    return datasheet_link


@app.callback(
    Output('chem_img', 'src'),
    [Input('clickable-graph', 'hoverData'),Input('threshold-slider', 'value')])
def display_image(hoverData,selected_threshold):
    row = dfRowFromHover(hoverData,selected_threshold)
    if row.empty:
        return
    img_src = row['IMG_URL'].iloc[0]
    return img_src

@app.callback(
    Output('star_rating', 'children'),
    [Input('clickable-graph', 'hoverData'),Input('threshold-slider', 'value')])
def display_star(hoverData,selected_threshold):
    row = dfRowFromHover(hoverData,selected_threshold)
    if row.empty:
        return
    star = row['stars'].iloc[0]
    return star

@app.callback(
    Output('topic1', 'children'),
    [Input('clickable-graph', 'hoverData'),Input('threshold-slider', 'value')])
def display_topic1(hoverData,selected_threshold):
    row = dfRowFromHover(hoverData,selected_threshold)
    if row.empty:
        return
    topic1 = row['topic1_for_disp'].iloc[0]
    return topic1

@app.callback(
    Output('topic2', 'children'),
    [Input('clickable-graph', 'hoverData'),Input('threshold-slider', 'value')])
def display_topic2(hoverData,selected_threshold):
    row = dfRowFromHover(hoverData,selected_threshold)
    if row.empty:
        return
    topic2 = row['topic2_for_disp'].iloc[0]
    return topic2

@app.callback(
    Output('topic3', 'children'),
    [Input('clickable-graph', 'hoverData'),Input('threshold-slider', 'value')])
def display_topic3(hoverData,selected_threshold):
    row = dfRowFromHover(hoverData,selected_threshold)
    if row.empty:
        return
    topic3 = row['topic3_for_disp'].iloc[0]
    return topic3

In [11]:
external_css = ["https://cdnjs.cloudflare.com/ajax/libs/skeleton/2.0.4/skeleton.min.css",
                "//fonts.googleapis.com/css?family=Raleway:400,300,600",
                "//fonts.googleapis.com/css?family=Dosis:Medium",
                "https://cdn.rawgit.com/plotly/dash-app-stylesheets/0e463810ed36927caf20372b6411690692f94819/dash-drug-discovery-demo-stylesheet.css"]


for css in external_css:
    app.css.append_css({"external_url": css})


if __name__ == '__main__':
    app.run_server(port=8052)


 * Running on http://127.0.0.1:8052/ (Press CTRL+C to quit)


In [12]:
fig = dict(data=data, layout=layout)

NameError: name 'data' is not defined