In [1]:
from collections import defaultdict
import pandas as pd
import numpy as np
import math
import statistics
import re
import json

In [2]:
business=pd.read_csv("yelp_business.csv")

In [3]:
business= business[business.state == "PA"]
business= business[business.is_open == 1]


In [4]:
def category_splitter(category_string):
    categories = category_string.split(";")   
    return categories
business['categories'] = business['categories'].map(lambda x: category_splitter(x))


In [5]:
def first_list(category_list):
    category_list = category_list[0]
    return category_list
business['categories'] = business['categories'].map(lambda x: first_list(x).lower())
business['categories'] = business['categories'].map(lambda x: re.sub(r'\'', '', x).lower())

business.drop(["business_id","neighborhood","address","review_count","is_open"], axis=1)


Unnamed: 0,name,city,state,postal_code,latitude,longitude,stars,categories
1,"""Stephen Szabo Salon""",McMurray,PA,15317,40.291685,-80.104900,3.0,hair stylists
8,"""Any Given Sundae""",Wexford,PA,15090,40.615102,-80.091349,5.0,coffee & tea
13,"""Complete Dental Care""",Homestead,PA,15120,40.401488,-79.887916,2.0,general dentistry
51,"""Enterprise Rent-A-Car""",Homestead,PA,15120,40.409116,-79.897415,3.0,hotels & travel
75,"""Crazy Mocha Coffee""",Pittsburgh,PA,15201,40.469295,-79.961273,3.5,restaurants
76,"""Bruegger's Bagels""",Pittsburgh,PA,15236,40.344194,-79.970963,2.5,breakfast & brunch
98,"""Blush""",Pittsburgh,PA,15222,40.444236,-79.998325,3.0,nightlife
109,"""T.W.Tile""",Pittsburgh,PA,15236,40.327956,-79.960479,3.5,kitchen & bath
114,"""Coraopolis Collision and Repair Service""",Coraopolis,PA,15108,40.522145,-80.172691,4.5,smog check stations
178,"""Napoli Pizzeria""",Pittsburgh,PA,15217,40.434399,-79.922983,4.0,italian


In [6]:
business.to_csv("pa_data.csv")

In [8]:
# for typeahead words
raw_unique_cats = list(np.unique(business.categories))

unique_cats = [re.sub(r'\'', '', i).lower() for i in raw_unique_cats]

print(unique_cats)

['accessories', 'accountants', 'active life', 'acupuncture', 'adult', 'adult entertainment', 'advertising', 'air duct cleaning', 'airlines', 'airport shuttles', 'airports', 'amateur sports teams', 'american (new)', 'american (traditional)', 'amusement parks', 'animal shelters', 'antiques', 'apartments', 'appliances', 'appliances & repair', 'appraisal services', 'aquarium services', 'arcades', 'architectural tours', 'argentine', 'art classes', 'art galleries', 'art museums', 'art schools', 'art supplies', 'arts & crafts', 'arts & entertainment', 'asian fusion', 'auto customization', 'auto detailing', 'auto glass services', 'auto insurance', 'auto parts & supplies', 'auto repair', 'automotive', 'awnings', 'ayurveda', 'baby gear & furniture', 'bagels', 'bakeries', 'banks & credit unions', 'barbeque', 'barbers', 'barre classes', 'bars', 'bartenders', 'basque', 'battery stores', 'batting cages', 'beaches', 'beauty & spas', 'bed & breakfast', 'beer', 'beer bar', 'beer tours', 'belgian', 'bik

# Create JSON of Map Grid of Boxes

### Filter city area out of all businesses

In [9]:
# Pittsburgh Area
min_long = -80.095172
max_long = -79.867893
min_lat = 40.361012
max_lat = 40.502653

city_biz = business[(business.latitude < max_lat) &
                         (business.latitude > min_lat) &
                         (business.longitude < max_long) &
                         (business.longitude > min_long)]

### Compute lat and long increments

In [10]:
# Compute degrees lat per mile (N-S) and degrees long per mile (E-W)
ns_deg_per_mi = 1/69.01
ew_deg_per_mi = 1/(math.cos(
                            statistics.mean([min_lat, max_lat])*(math.pi/180)
                            ) * 69.1710411)

# Compute miles NS and EW for selected city area
miles_ns = (max_lat - min_lat)/ns_deg_per_mi
miles_ew = (max_long - min_long)/ew_deg_per_mi

delta_long = (max_long - min_long)/miles_ew
delta_lat = (max_lat - min_lat)/miles_ns

### Get counts of number of each category in a grid box

In [11]:
def get_category_counts(minimum_lat, minimum_long, d_lat, d_long):
    category_dict = defaultdict(int)
    for column, biz in business.iterrows():
        if ((biz['latitude'] > minimum_lat) and (biz['latitude'] < minimum_lat + d_lat)) and ((biz['longitude'] > minimum_long) and (biz['longitude'] < minimum_long + d_long)):
            category_dict[biz['categories']] += 1
    if not category_dict:
        category_dict['no business found in this area'] += 1
    return category_dict
        

### Create list of grid box dictionaries

In [12]:
boxes = []
 
i = min_lat
while i < max_lat:
    j = min_long
    while j < max_long:
        cat_counts = get_category_counts(i, j, delta_lat, delta_long)
        box = {'min_lat': i,
              'max_lat': i + delta_lat,
              'min_long': j,
              'max_long': j + delta_long,
              'category_counts': cat_counts}
        print(box)
        boxes.append(box)
        j += delta_long
    i += delta_lat
        

{'min_lat': 40.361012, 'max_lat': 40.37550265352848, 'min_long': -80.095172, 'max_long': -80.0761791823135, 'category_counts': defaultdict(<class 'int'>, {'farmers market': 1, 'pizza': 1, 'automotive': 1, 'water heater installation/repair': 1, 'doctors': 1, 'carpet cleaning': 1, 'health & medical': 1, 'heating & air conditioning/hvac': 1, 'caterers': 1, 'diners': 1, 'shopping': 1, 'local services': 1, 'food': 1, 'gas stations': 1, 'nurseries & gardening': 1, 'auto repair': 1})}
{'min_lat': 40.361012, 'max_lat': 40.37550265352848, 'min_long': -80.0761791823135, 'max_long': -80.05718636462699, 'category_counts': defaultdict(<class 'int'>, {'home & garden': 1, 'sandwiches': 1, 'sporting goods': 1, 'fences & gates': 1})}
{'min_lat': 40.361012, 'max_lat': 40.37550265352848, 'min_long': -80.05718636462699, 'max_long': -80.03819354694048, 'category_counts': defaultdict(<class 'int'>, {'active life': 1, 'home services': 1, 'health & medical': 1})}
{'min_lat': 40.361012, 'max_lat': 40.375502653

{'min_lat': 40.37550265352848, 'max_lat': 40.389993307056955, 'min_long': -79.96222227619444, 'max_long': -79.94322945850793, 'category_counts': defaultdict(<class 'int'>, {'no business found in this area': 1})}
{'min_lat': 40.37550265352848, 'max_lat': 40.389993307056955, 'min_long': -79.94322945850793, 'max_long': -79.92423664082142, 'category_counts': defaultdict(<class 'int'>, {'food': 1, 'churches': 1})}
{'min_lat': 40.37550265352848, 'max_lat': 40.389993307056955, 'min_long': -79.92423664082142, 'max_long': -79.90524382313491, 'category_counts': defaultdict(<class 'int'>, {'nightlife': 1})}
{'min_lat': 40.37550265352848, 'max_lat': 40.389993307056955, 'min_long': -79.90524382313491, 'max_long': -79.8862510054484, 'category_counts': defaultdict(<class 'int'>, {'restaurants': 2, 'nail salons': 1, 'beauty & spas': 1, 'florists': 1, 'chicken wings': 1, 'pizza': 2, 'accessories': 1, 'nightlife': 1, 'american (traditional)': 1, 'pet services': 1, 'toy stores': 1})}
{'min_lat': 40.37550

{'min_lat': 40.40448396058543, 'max_lat': 40.41897461411391, 'min_long': -80.05718636462699, 'max_long': -80.03819354694048, 'category_counts': defaultdict(<class 'int'>, {'active life': 1, 'italian': 2, 'event planning & services': 1, 'food': 1, 'pets': 1, 'hobby shops': 1, 'automotive': 1, 'arts & entertainment': 1, 'doctors': 1, 'general dentistry': 1, 'oil change stations': 1, 'health & medical': 1, 'na': 1, 'bagels': 1, 'comfort food': 1, 'hair removal': 1, 'home services': 1})}
{'min_lat': 40.40448396058543, 'max_lat': 40.41897461411391, 'min_long': -80.03819354694048, 'max_long': -80.01920072925397, 'category_counts': defaultdict(<class 'int'>, {'american (new)': 1, 'water heater installation/repair': 1, 'burgers': 1, 'home services': 1, 'grocery': 2, 'local flavor': 1, 'food': 1, 'heating & air conditioning/hvac': 1, 'event planning & services': 1, 'italian': 1, 'restaurants': 1, 'car stereo installation': 1, 'tanning': 1, 'laundry services': 1, 'keys & locksmiths': 1, 'indian'

{'min_lat': 40.41897461411391, 'max_lat': 40.43346526764238, 'min_long': -79.98121509388095, 'max_long': -79.96222227619444, 'category_counts': defaultdict(<class 'int'>, {'auto repair': 1, 'restaurants': 19, 'nightlife': 7, 'hotels & travel': 3, 'automotive': 4, 'yoga': 1, 'fitness & instruction': 3, 'chinese': 1, 'fashion': 2, 'american (traditional)': 6, 'hotels': 1, 'mediterranean': 1, 'home services': 2, 'optometrists': 2, 'basque': 1, 'american (new)': 3, 'shopping': 10, 'local services': 2, 'bars': 4, 'bikes': 1, 'beauty & spas': 6, 'venues & event spaces': 1, 'barbeque': 1, 'beer': 1, 'accountants': 1, 'art galleries': 1, 'dive bars': 2, 'hair salons': 1, 'home & garden': 1, 'drugstores': 2, 'tanning beds': 1, 'shopping centers': 1, 'food': 8, 'fast food': 3, 'waxing': 1, 'arts & entertainment': 3, 'performing arts': 1, 'piercing': 2, 'jewelry': 1, 'pubs': 2, 'cosmetic surgeons': 1, 'desserts': 1, 'windshield installation & repair': 1, 'banks & credit unions': 1, 'mens clothing

{'min_lat': 40.43346526764238, 'max_lat': 40.44795592117086, 'min_long': -80.00020791156746, 'max_long': -79.98121509388095, 'category_counts': defaultdict(<class 'int'>, {'nightlife': 10, 'restaurants': 39, 'shopping': 16, 'parks': 2, 'barbeque': 1, 'books': 2, 'breakfast & brunch': 1, 'food court': 1, 'mexican': 1, 'active life': 5, 'food': 25, 'musical instruments & teachers': 1, 'banks & credit unions': 3, 'auto repair': 1, 'lounges': 1, 'hotels & travel': 10, 'convenience stores': 1, 'tires': 1, 'steakhouses': 2, 'american (new)': 6, 'jewelry': 2, 'automotive': 2, 'japanese': 1, 'home inspectors': 3, 'arts & crafts': 2, 'desserts': 3, 'beauty & spas': 7, 'food trucks': 5, 'sandwiches': 5, 'child care & day care': 1, 'architectural tours': 1, 'auto glass services': 1, 'bars': 8, 'american (traditional)': 7, 'jewelry repair': 1, 'divorce & family law': 1, 'apartments': 3, 'caterers': 1, 'public services & government': 1, 'coffee & tea': 4, 'legal services': 1, 'departments of motor 

{'min_lat': 40.43346526764238, 'max_lat': 40.44795592117086, 'min_long': -79.8862510054484, 'max_long': -79.86725818776189, 'category_counts': defaultdict(<class 'int'>, {'diners': 1, 'dry cleaning': 1, 'towing': 1, 'ice cream & frozen yogurt': 1, 'restaurants': 4, 'hair salons': 1, 'fast food': 1, 'beer': 1, 'food': 1, 'shopping': 2, 'soul food': 1, 'travel services': 1, 'jewelry': 1, 'grocery': 1, 'home & garden': 1, 'local services': 1, 'home services': 1, 'pizza': 1, 'oil change stations': 1})}
{'min_lat': 40.44795592117086, 'max_lat': 40.462446574699335, 'min_long': -80.095172, 'max_long': -80.0761791823135, 'category_counts': defaultdict(<class 'int'>, {'local services': 1, 'food': 1, 'apartments': 1})}
{'min_lat': 40.44795592117086, 'max_lat': 40.462446574699335, 'min_long': -80.0761791823135, 'max_long': -80.05718636462699, 'category_counts': defaultdict(<class 'int'>, {'clock repair': 1, 'permanent makeup': 1})}
{'min_lat': 40.44795592117086, 'max_lat': 40.462446574699335, 'mi

{'min_lat': 40.44795592117086, 'max_lat': 40.462446574699335, 'min_long': -79.92423664082142, 'max_long': -79.90524382313491, 'category_counts': defaultdict(<class 'int'>, {'food': 4, 'automotive': 3, 'mobile phone accessories': 1, 'auto repair': 2, 'arts & crafts': 1, 'department stores': 2, 'shipping centers': 1, 'shopping centers': 1, 'supernatural readings': 1, 'shopping': 2, 'gyms': 2, 'pet adoption': 1, 'hair removal': 1, 'home & garden': 1, 'specialty schools': 1, 'furniture stores': 1, 'chicken wings': 1, 'flowers & gifts': 1, 'active life': 2, 'hair salons': 1, 'pet services': 2, 'real estate': 1, 'massage therapy': 1, 'restaurants': 5, 'cosmetics & beauty supply': 2, 'mobile phone repair': 1, 'nail salons': 1, 'fashion': 1, 'coffee & tea': 2, 'beauty & spas': 2, 'health & medical': 2, 'property management': 2, 'veterinarians': 1, 'sandwiches': 2, 'belgian': 1, 'plumbing': 1, 'bakeries': 1, 'education': 3, 'chocolatiers & shops': 1, 'buffets': 1, 'massage': 1, 'drugstores': 1,

{'min_lat': 40.47693722822781, 'max_lat': 40.49142788175629, 'min_long': -80.095172, 'max_long': -80.0761791823135, 'category_counts': defaultdict(<class 'int'>, {'professional services': 1, 'nightlife': 1})}
{'min_lat': 40.47693722822781, 'max_lat': 40.49142788175629, 'min_long': -80.0761791823135, 'max_long': -80.05718636462699, 'category_counts': defaultdict(<class 'int'>, {'dry cleaning & laundry': 1, 'automotive': 2, 'chicken wings': 1, 'breakfast & brunch': 1, 'home services': 1, 'oil change stations': 1, 'beer': 1, 'antiques': 1})}
{'min_lat': 40.47693722822781, 'max_lat': 40.49142788175629, 'min_long': -80.05718636462699, 'max_long': -80.03819354694048, 'category_counts': defaultdict(<class 'int'>, {'food': 2, 'coffee & tea': 1, 'event planning & services': 1, 'fast food': 1, 'automotive': 2, 'heating & air conditioning/hvac': 1, 'restaurants': 1, 'grocery': 1, 'tires': 1, 'spin classes': 1, 'pets': 1, 'meat shops': 1, 'towing': 1})}
{'min_lat': 40.47693722822781, 'max_lat': 40

{'min_lat': 40.49142788175629, 'max_lat': 40.50591853528476, 'min_long': -79.96222227619444, 'max_long': -79.94322945850793, 'category_counts': defaultdict(<class 'int'>, {'italian': 1, 'restaurants': 3, 'hair stylists': 1, 'automotive': 2, 'food': 1, 'american (traditional)': 1, 'movers': 1, 'fitness & instruction': 1, 'arts & entertainment': 1, 'home & garden': 1, 'beauty & spas': 1, 'wedding planning': 1, 'nail salons': 1, 'home inspectors': 1, 'flowers & gifts': 1, 'auto parts & supplies': 1, 'nightlife': 1, 'orthodontists': 1, 'home services': 1})}
{'min_lat': 40.49142788175629, 'max_lat': 40.50591853528476, 'min_long': -79.94322945850793, 'max_long': -79.92423664082142, 'category_counts': defaultdict(<class 'int'>, {'breweries': 1, 'bakeries': 1, 'door sales/installation': 1, 'boxing': 1, 'grocery': 1, 'pizza': 1, 'active life': 2, 'carpet cleaning': 1, 'cafes': 1, 'sandwiches': 1, 'dry cleaning & laundry': 1, 'roadside assistance': 1, 'summer camps': 1, 'nightlife': 1, 'movers':

### Write to file

In [13]:
with open('pa_grid_boxes_category_counts.json', 'w') as outfile:
    json.dump(boxes, outfile)