In [1]:
from ipywidgets import interact, interact_manual
from nltk.corpus import stopwords
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import folium
import json
import re

## New York Cuisine Map

In [2]:
df_zip3 = pd.read_csv('cuisine_density_differences.csv', index_col=0)

In [32]:
df_zip3.loc[:, 'postalCode'] = df_zip3['postalCode'].astype(str)
df_zip3.iloc[:, :50] = df_zip3.iloc[:, :50].clip(-1, 1)

In [9]:
ny_zips_url = \
    'https://raw.githubusercontent.com/fedhere/PUI2015_EC/master/mam1612_EC/nyc-zip-code-tabulation-areas-polygons.geojson'
NY_COORDINATES = (40.7127753, -74.0059728)

In [36]:
def draw_map(cuisine_name):
    ny_map = folium.Map(location=NY_COORDINATES, tiles='OpenStreetMap', zoom_start=11)
    chloropleth = folium.Choropleth(
        geo_data=ny_zips_url,
        name='choropleth',
        data=df_zip3,
        columns=['postalCode', cuisine_name],  # columns of dataframe in df_zip3
        key_on='feature.properties.postalCode',  # refers to the data in ny_zips
        fill_color='RdBu', 
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name='Restaurant density difference'
    ).add_to(ny_map)
    chloropleth.geojson.add_child(folium.features.GeoJsonTooltip(['PO_NAME', 'postalCode']))
    return ny_map

In [38]:
interact(draw_map, cuisine_name=list(df_zip3.columns[:50]))

interactive(children=(Dropdown(description='cuisine_name', options=('sandwiches', 'pizza', 'asian', 'salads', …

<function __main__.draw_map(cuisine_name)>

## New York Menu Prices

In [16]:
er = pd.read_csv('menu_items2.csv', index_col=0).dropna()

In [49]:
# clean_stopwords = set([re.sub('[^a-z]', '', w) for w in stopwords.words('English')])

def clean(w):
    return set(re.sub('[^a-z ]', '', w.lower()).strip().split(' ')) #- clean_stopwords

In [50]:
er['product_description_clean'] = er['product_description'].apply(clean)

In [51]:
def item_price_dist(item_name):
    item_name_set = clean(item_name)
    item_name_jacc_sim = (er['product_description_clean'].apply(lambda x: item_name_set & x).apply(len) / len(item_name_set))
    item_prices = er[item_name_jacc_sim == item_name_jacc_sim.max()]['item_price_minus_add_ons']
    item_prices = item_prices[(item_prices < 50) & (item_prices > 0)]
    item_prices.plot.hist(figsize=(12, 4), 
                          bins=np.arange(np.floor(item_prices.min())-0.5, np.ceil(item_prices.max())+0.5), 
                          density=True)
    plt.xlabel('Price ($)')
    plt.title(f'{item_name} Price Distribution')
    plt.show()

In [52]:
interact_manual(item_price_dist, item_name='Massaman Curry')

interactive(children=(Text(value='Massaman Curry', description='item_name'), Button(description='Run Interact'…

<function __main__.item_price_dist(item_name)>