In [1]:
from collections import defaultdict
import pandas as pd
import numpy as np
import math
import statistics
import re
import json

In [2]:
business=pd.read_csv("yelp_business.csv")

In [3]:
business= business[business.state == "WI"]
business= business[business.is_open == 1]


In [4]:
def category_splitter(category_string):
    categories = category_string.split(";")   
    return categories
business['categories'] = business['categories'].map(lambda x: category_splitter(x))


In [5]:
def first_list(category_list):
    category_list = category_list[0]
    return category_list
business['categories'] = business['categories'].map(lambda x: first_list(x).lower())
business['categories'] = business['categories'].map(lambda x: re.sub(r'\'', '', x).lower())

business.drop(["business_id","neighborhood","address","review_count","is_open"], axis=1)


Unnamed: 0,name,city,state,postal_code,latitude,longitude,stars,categories
17,"""Meineke Car Care Center""",Sun Prairie,WI,53590,43.185080,-89.262047,3.5,tires
47,"""William Jon Salon & Spa""",Madison,WI,53705,43.073145,-89.434402,4.5,tanning
74,"""Banana Republic""",Madison,WI,53719,43.057090,-89.509300,2.0,mens clothing
81,"""Villa Tap""",Madison,WI,53704,43.123389,-89.355390,4.0,american (traditional)
85,"""The Brink Lounge""",Madison,WI,53703,43.079817,-89.376296,3.5,event planning & services
111,"""Spay Me! Clinic""",Madison,WI,53714,43.091157,-89.308487,3.0,veterinarians
164,"""Apple Wellness - Fitchburg Health Store""",Fitchburg,WI,53719,43.015367,-89.481681,4.5,health markets
320,"""Walgreens""",Sun Prairie,WI,53590,43.186925,-89.228824,2.0,drugstores
358,"""J&P Fresh Market""",Madison,WI,53719,43.050634,-89.497717,4.0,grocery
465,"""Stanton Optical""",Madison,WI,53704,43.127049,-89.310758,1.5,health & medical


In [6]:
business.to_csv("wi_data.csv")

In [7]:
# for typeahead words
raw_unique_cats = list(np.unique(business.categories))

unique_cats = [re.sub(r'\'', '', i).lower() for i in raw_unique_cats]

print(unique_cats)

['accessories', 'accountants', 'active life', 'acupuncture', 'adult', 'adult education', 'afghan', 'airport shuttles', 'amateur sports teams', 'american (new)', 'american (traditional)', 'amusement parks', 'animal shelters', 'antiques', 'apartments', 'appliances', 'appliances & repair', 'arcades', 'art classes', 'art galleries', 'art museums', 'art supplies', 'arts & crafts', 'arts & entertainment', 'asian fusion', 'attraction farms', 'auto customization', 'auto detailing', 'auto glass services', 'auto insurance', 'auto loan providers', 'auto parts & supplies', 'auto repair', 'automotive', 'ayurveda', 'baby gear & furniture', 'bagels', 'bakeries', 'banks & credit unions', 'barbeque', 'barbers', 'bars', 'bartenders', 'basque', 'beaches', 'beauty & spas', 'bed & breakfast', 'beer', 'beer bar', 'bike rentals', 'bike repair/maintenance', 'bikes', 'bistros', 'blow dry/out services', 'boating', 'body shops', 'books', 'bookstores', 'boot camps', 'botanical gardens', 'bowling', 'boxing', 'bras

# Create JSON of Map Grid of Boxes

### Filter city area out of all businesses

In [8]:
# Madison Area
min_long = -89.573277
max_long = -89.244374
min_lat = 43.000413
max_lat = 43.177176

city_biz = business[(business.latitude < max_lat) &
                         (business.latitude > min_lat) &
                         (business.longitude < max_long) &
                         (business.longitude > min_long)]

### Compute lat and long increments

In [9]:
# Compute degrees lat per mile (N-S) and degrees long per mile (E-W)
ns_deg_per_mi = 1/69.01
ew_deg_per_mi = 1/(math.cos(
                            statistics.mean([min_lat, max_lat])*(math.pi/180)
                            ) * 69.1710411)

# Compute miles NS and EW for selected city area
miles_ns = (max_lat - min_lat)/ns_deg_per_mi
miles_ew = (max_long - min_long)/ew_deg_per_mi

delta_long = (max_long - min_long)/miles_ew
delta_lat = (max_lat - min_lat)/miles_ns

### Get counts of number of each category in a grid box

In [10]:
def get_category_counts(minimum_lat, minimum_long, d_lat, d_long):
    category_dict = defaultdict(int)
    for column, biz in business.iterrows():
        if ((biz['latitude'] > minimum_lat) and (biz['latitude'] < minimum_lat + d_lat)) and ((biz['longitude'] > minimum_long) and (biz['longitude'] < minimum_long + d_long)):
            category_dict[biz['categories']] += 1
    if not category_dict:
        category_dict['no business found in this area'] += 1
    return category_dict
        

### Create list of grid box dictionaries

In [11]:
boxes = []
 
i = min_lat
while i < max_lat:
    j = min_long
    while j < max_long:
        cat_counts = get_category_counts(i, j, delta_lat, delta_long)
        box = {'min_lat': i,
              'max_lat': i + delta_lat,
              'min_long': j,
              'max_long': j + delta_long,
              'category_counts': cat_counts}
        print(box)
        boxes.append(box)
        j += delta_long
    i += delta_lat
        

{'min_lat': 43.000413, 'max_lat': 43.01490365352848, 'min_long': -89.573277, 'max_long': -89.55348102805317, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.000413, 'max_lat': 43.01490365352848, 'min_long': -89.55348102805317, 'max_long': -89.53368505610634, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.000413, 'max_lat': 43.01490365352848, 'min_long': -89.53368505610634, 'max_long': -89.5138890841595, 'category_counts': defaultdict(<class 'int'>, {'cosmetic dentists': 1, 'beauty & spas': 1, 'tobacco shops': 1, 'home services': 1, 'sports bars': 1, 'restaurants': 2, 'nightlife': 1, 'chinese': 1, 'health & medical': 1})}
{'min_lat': 43.000413, 'max_lat': 43.01490365352848, 'min_long': -89.5138890841595, 'max_long': -89.49409311221267, 'category_counts': defaultdict(<class 'int'>, {'trainers': 1, 'restaurants': 1, 'sandwiches': 1, 'food': 1, 'barbers': 1, 'beauty & spas': 1, 'beer'

{'min_lat': 43.01490365352848, 'max_lat': 43.029394307056954, 'min_long': -89.3951132524785, 'max_long': -89.37531728053166, 'category_counts': defaultdict(<class 'int'>, {'automotive': 1})}
{'min_lat': 43.01490365352848, 'max_lat': 43.029394307056954, 'min_long': -89.37531728053166, 'max_long': -89.35552130858483, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.01490365352848, 'max_lat': 43.029394307056954, 'min_long': -89.35552130858483, 'max_long': -89.33572533663799, 'category_counts': defaultdict(<class 'int'>, {'active life': 1})}
{'min_lat': 43.01490365352848, 'max_lat': 43.029394307056954, 'min_long': -89.33572533663799, 'max_long': -89.31592936469116, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.01490365352848, 'max_lat': 43.029394307056954, 'min_long': -89.31592936469116, 'max_long': -89.29613339274432, 'category_counts': defaultdict(<class 'int'>, {'health & medical':

{'min_lat': 43.04388496058543, 'max_lat': 43.058375614113906, 'min_long': -89.55348102805317, 'max_long': -89.53368505610634, 'category_counts': defaultdict(<class 'int'>, {'toy stores': 1})}
{'min_lat': 43.04388496058543, 'max_lat': 43.058375614113906, 'min_long': -89.53368505610634, 'max_long': -89.5138890841595, 'category_counts': defaultdict(<class 'int'>, {'event planning & services': 2, 'massage': 1, 'chiropractors': 1, 'food': 1, 'tanning': 1, 'grocery': 1, 'restaurants': 1, 'golf': 1, 'eyewear & opticians': 1, 'pet training': 1, 'cinema': 1, 'pet services': 1, 'home services': 1, 'real estate': 2, 'beauty & spas': 1, 'hotels & travel': 1, 'automotive': 1, 'real estate agents': 1, 'specialty food': 1, 'dentists': 1, 'hair salons': 1, 'american (new)': 1, 'real estate services': 1, 'oil change stations': 1, 'car wash': 1})}
{'min_lat': 43.04388496058543, 'max_lat': 43.058375614113906, 'min_long': -89.5138890841595, 'max_long': -89.49409311221267, 'category_counts': defaultdict(<c

{'min_lat': 43.04388496058543, 'max_lat': 43.058375614113906, 'min_long': -89.31592936469116, 'max_long': -89.29613339274432, 'category_counts': defaultdict(<class 'int'>, {'hotels': 1, 'masonry/concrete': 1, 'real estate agents': 1, 'fast food': 1, 'breakfast & brunch': 1, 'building supplies': 1, 'restaurants': 1, 'pets': 2, 'pet training': 1, 'martial arts': 1, 'hotels & travel': 2, 'food': 1, 'home services': 1, 'local services': 2, 'automotive': 3, 'bars': 1, 'hiking': 1, 'garage door services': 1, 'shopping': 2, 'american (traditional)': 1, 'specialty food': 2, 'event planning & services': 1, 'car stereo installation': 1, 'auto detailing': 1, 'tree services': 1})}
{'min_lat': 43.04388496058543, 'max_lat': 43.058375614113906, 'min_long': -89.29613339274432, 'max_long': -89.27633742079749, 'category_counts': defaultdict(<class 'int'>, {'food': 1, 'party bus rentals': 1, 'local services': 2, 'drugstores': 1, 'home services': 1, 'automotive': 1, 'home cleaning': 1})}
{'min_lat': 43.04

{'min_lat': 43.058375614113906, 'max_lat': 43.07286626764238, 'min_long': -89.37531728053166, 'max_long': -89.35552130858483, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.058375614113906, 'max_lat': 43.07286626764238, 'min_long': -89.35552130858483, 'max_long': -89.33572533663799, 'category_counts': defaultdict(<class 'int'>, {'libraries': 1})}
{'min_lat': 43.058375614113906, 'max_lat': 43.07286626764238, 'min_long': -89.33572533663799, 'max_long': -89.31592936469116, 'category_counts': defaultdict(<class 'int'>, {'ice cream & frozen yogurt': 2, 'dry cleaning & laundry': 1, 'cosmetics & beauty supply': 1, 'restaurants': 3, 'real estate': 1, 'local services': 1, 'sewing & alterations': 1, 'desserts': 1, 'pets': 2, 'hair salons': 1, 'roadside assistance': 1, 'malaysian': 1, 'auto repair': 1, 'grocery': 1, 'financial services': 1, 'pizza': 2, 'pet sitting': 1, 'party & event planning': 1, 'veterinarians': 1, 'fast food': 1, 'shopping

{'min_lat': 43.07286626764238, 'max_lat': 43.08735692117086, 'min_long': -89.37531728053166, 'max_long': -89.35552130858483, 'category_counts': defaultdict(<class 'int'>, {'active life': 1, 'nightlife': 5, 'home decor': 1, 'beer': 1, 'veterinarians': 1, 'chinese': 1, 'event planning & services': 2, 'food trucks': 2, 'tex-mex': 1, 'event photography': 1, 'restaurants': 4, 'coffee & tea': 2, 'home & garden': 1, 'photographers': 2, 'paint & sip': 1, 'tours': 1, 'home services': 2, 'vietnamese': 1, 'financial services': 1, 'session photography': 2, 'piercing': 1, 'physical therapy': 1, 'car wash': 1, 'na': 1, 'shopping': 3, 'mexican': 1, 'tapas bars': 1, 'pubs': 1, 'chiropractors': 1, 'automotive': 2, 'parks': 1, 'pet stores': 1, 'laotian': 1, 'japanese': 1, 'venues & event spaces': 1, 'distilleries': 1, 'food': 8, 'asian fusion': 1, 'burgers': 1, 'contractors': 1, 'hair removal': 1, 'real estate': 1, 'pizza': 1, 'thrift stores': 2, 'wine bars': 1, 'flooring': 1, 'comic books': 1, 'local s

{'min_lat': 43.08735692117086, 'max_lat': 43.101847574699335, 'min_long': -89.37531728053166, 'max_long': -89.35552130858483, 'category_counts': defaultdict(<class 'int'>, {'massage': 1, 'car rental': 1, 'home services': 3, 'shopping': 3, 'pizza': 1, 'nurseries & gardening': 1, 'beauty & spas': 1, 'local services': 1, 'food': 5, 'restaurants': 3, 'hair salons': 1, 'sandwiches': 2, 'yoga': 1, 'chinese': 1, 'body shops': 1, 'nightlife': 1, 'vegetarian': 1, 'parks': 1, 'shoe repair': 1, 'fitness & instruction': 1, 'pubs': 1, 'candy stores': 1, 'electricians': 1, 'pet services': 1, 'tattoo': 1, 'education': 1, 'towing': 1, 'accountants': 1, 'dive bars': 1, 'playgrounds': 1, 'event planning & services': 1})}
{'min_lat': 43.08735692117086, 'max_lat': 43.101847574699335, 'min_long': -89.35552130858483, 'max_long': -89.33572533663799, 'category_counts': defaultdict(<class 'int'>, {'event planning & services': 1, 'mexican': 1, 'automotive': 3, 'hair removal': 2, 'food': 4, 'massage': 1, 'fitnes

{'min_lat': 43.101847574699335, 'max_lat': 43.11633822822781, 'min_long': -89.31592936469116, 'max_long': -89.29613339274432, 'category_counts': defaultdict(<class 'int'>, {'home services': 2, 'restaurants': 1, 'active life': 2, 'pizza': 1, 'apartments': 1, 'auto repair': 1, 'department stores': 1, 'tires': 1, 'banks & credit unions': 1, 'local services': 1, 'food': 1})}
{'min_lat': 43.101847574699335, 'max_lat': 43.11633822822781, 'min_long': -89.29613339274432, 'max_long': -89.27633742079749, 'category_counts': defaultdict(<class 'int'>, {'dive bars': 1, 'preschools': 1})}
{'min_lat': 43.101847574699335, 'max_lat': 43.11633822822781, 'min_long': -89.27633742079749, 'max_long': -89.25654144885065, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.101847574699335, 'max_lat': 43.11633822822781, 'min_long': -89.25654144885065, 'max_long': -89.23674547690382, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area':

{'min_lat': 43.13082888175629, 'max_lat': 43.14531953528476, 'min_long': -89.454501168319, 'max_long': -89.43470519637216, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.13082888175629, 'max_lat': 43.14531953528476, 'min_long': -89.43470519637216, 'max_long': -89.41490922442533, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.13082888175629, 'max_lat': 43.14531953528476, 'min_long': -89.41490922442533, 'max_long': -89.3951132524785, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 43.13082888175629, 'max_lat': 43.14531953528476, 'min_long': -89.3951132524785, 'max_long': -89.37531728053166, 'category_counts': defaultdict(<class 'int'>, {'elementary schools': 1})}
{'min_lat': 43.13082888175629, 'max_lat': 43.14531953528476, 'min_long': -89.37531728053166, 'max_long': -89.35552130858483, 'category_counts': defaultdict(<class 'int'>, {

### Write to file

In [12]:
with open('wi_grid_boxes_category_counts.json', 'w') as outfile:
    json.dump(boxes, outfile)