In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import statistics
from collections import defaultdict

%matplotlib inline

### Filter city area out of all businesses

In [2]:
business=pd.read_csv("yelp_business.csv")

In [3]:
# Cleveland Area
min_long = -82.354646
max_long = -81.338411
min_lat = 41.114023
max_lat = 41.730589

city_biz = business[(business.latitude < max_lat) &
                         (business.latitude > min_lat) &
                         (business.longitude < max_long) &
                         (business.longitude > min_long)]

### Convert lat/long into miles and compute 1-mile increments for x and y directions

In [4]:
# Compute degrees lat per mile (N-S) and degrees long per mile (E-W)
ns_deg_per_mi = 1/69.01
ew_deg_per_mi = 1/(math.cos(
                            statistics.mean([min_lat, max_lat])*(math.pi/180)
                            ) * 69.1710411)

# Compute miles NS and EW for selected city area
miles_ns = (max_lat - min_lat)/ns_deg_per_mi
miles_ew = (max_long - min_long)/ew_deg_per_mi

# Create x axis (E-W) and y axis (N-S) 1-mile block increments by lat/long
x_incr = (max_long - min_long)/miles_ew
y_incr = (max_lat - min_lat)/miles_ns

### Assign each business to a block on the grid through interpolation

In [5]:
city_biz['x'] = round((city_biz['longitude'] - min_long) *
                           (miles_ew - 0) / ((max_long - min_long))).astype(int)
city_biz['y'] = round((city_biz['latitude'] - min_lat) *
                          (miles_ns - 0) / ((max_lat - min_lat))).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


### Split the category strings into individual words and save in new column "cat_words"

In [6]:
# category_splitter returns a list of the category words for a given business_id
def category_splitter(category_string):
    categories = category_string.lower().split()
    categories = [x.split(';') for x in categories]
    categories = [item for sublist in categories for item in sublist] # flatten nested lists
    return categories

In [7]:
city_biz['cat_words'] = city_biz['categories'].map(lambda x: category_splitter(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


### Return names and locations of businesses (and star rating) from a search term

In [14]:
search_word = 'gelato'

def category_searcher(category_series, search_str):
    return category_series.map(lambda x: search_str in x)

search_results = city_biz[category_searcher(city_biz['cat_words'], search_word)]

#return just name and grid location from search results
results_name_loc_and_stars = search_results[['name','x','y','stars']]

print(results_name_loc_and_stars)

                               name   x   y  stars
29616       "Vero Pizza Napoletana"  39  27    4.0
47218   "Molto Bene Italian Eatery"  27  25    4.5
55130                 "Gelato Star"  34  22    4.5
71851               "CoCo's Gelato"  17  25    4.5
84528       "Market District Solon"  48  19    3.5
103052                "La Pizzeria"  39  27    3.5
115998   "Kool Berry Frozen Yogurt"  24  18    4.5
160377     "Cibreo Italian Kitchen"  35  27    3.5
