# Capstone Project: Comparing Boston vs NYC Neighborhoods

## Import Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [2]:
pd.options.mode.chained_assignment = None

## Importing and Cleaning the Raw Data to make final Location DataFrame

### Import Zipcodes CSV

In [3]:
US_loc_data = pd.read_csv('zip_code_database.csv', converters={'zip': lambda x: str(x)})

### Cleaning the data and making a table with just New York and Boston Zipcodes and Latitude/Longitude data

In [4]:
MAlocdata = US_loc_data[US_loc_data['state']=='MA']
NYlocdata = US_loc_data[US_loc_data['state']=='NY']

MAlocdata = MAlocdata[(MAlocdata['county']=='Suffolk County')]
MAlocdata.head()

Unnamed: 0,zip,type,decommissioned,primary_city,acceptable_cities,unacceptable_cities,state,county,timezone,area_codes,world_region,country,latitude,longitude,irs_estimated_population_2015
612,2108,STANDARD,0,Boston,,,MA,Suffolk County,America/New_York,617857339,,US,42.36,-71.06,4050
613,2109,STANDARD,0,Boston,,,MA,Suffolk County,America/New_York,617,,US,42.37,-71.05,3980
614,2110,STANDARD,0,Boston,,,MA,Suffolk County,America/New_York,508774617781857978,,US,42.36,-71.05,3320
615,2111,STANDARD,0,Boston,,,MA,Suffolk County,America/New_York,617781978339857,,US,42.35,-71.06,5720
616,2112,PO BOX,0,Boston,,,MA,Suffolk County,America/New_York,617,,US,42.35,-71.06,574


In [5]:
MAlocdata.reset_index(inplace=True)

In [6]:
MAlocdata.drop(columns=['index', 'type', 'decommissioned', 'acceptable_cities', 'unacceptable_cities', 'timezone', 'area_codes', 'country', 'world_region',
                       'irs_estimated_population_2015', 'state'], inplace=True)
MAlocdata.head()

Unnamed: 0,zip,primary_city,county,latitude,longitude
0,2108,Boston,Suffolk County,42.36,-71.06
1,2109,Boston,Suffolk County,42.37,-71.05
2,2110,Boston,Suffolk County,42.36,-71.05
3,2111,Boston,Suffolk County,42.35,-71.06
4,2112,Boston,Suffolk County,42.35,-71.06


In [7]:
MAlocdata.rename(columns={'primary_city':'city'}, inplace=True)
MAlocdata.head()

Unnamed: 0,zip,city,county,latitude,longitude
0,2108,Boston,Suffolk County,42.36,-71.06
1,2109,Boston,Suffolk County,42.37,-71.05
2,2110,Boston,Suffolk County,42.36,-71.05
3,2111,Boston,Suffolk County,42.35,-71.06
4,2112,Boston,Suffolk County,42.35,-71.06


In [8]:
NYClocdata = NYlocdata[(NYlocdata['county']=='New York County') | (NYlocdata['county']=='Bronx County') | (NYlocdata['county']=='Kings County') |
                      (NYlocdata['county']=='Queens County') | (NYlocdata['county']=='Richmond County')]
NYClocdata.head()

Unnamed: 0,zip,type,decommissioned,primary_city,acceptable_cities,unacceptable_cities,state,county,timezone,area_codes,world_region,country,latitude,longitude,irs_estimated_population_2015
3699,10001,STANDARD,0,New York,,"Empire State, G P O, Greeley Square, Macys Fin...",NY,New York County,America/New_York,718917347646,,US,40.75,-74.0,19710
3700,10002,STANDARD,0,New York,Knickerbocker,"Manhattan, New York City, Ny, Ny City, Nyc",NY,New York County,America/New_York,718,,US,40.71,-73.99,71450
3701,10003,STANDARD,0,New York,,"Cooper, Manhattan",NY,New York County,America/New_York,212347646718917,,US,40.73,-73.99,39470
3702,10004,STANDARD,0,New York,Bowling Green,,NY,New York County,America/New_York,212347646718917,,US,40.69,-74.02,4380
3703,10005,STANDARD,0,New York,Wall Street,"Manhattan, Nyc",NY,New York County,America/New_York,347718212646917,,US,40.71,-74.01,8600


In [9]:
NYClocdata.reset_index(inplace=True)
NYClocdata.drop(columns=['index', 'type', 'decommissioned', 'acceptable_cities', 'unacceptable_cities', 'timezone', 'area_codes', 'country', 'world_region',
                       'irs_estimated_population_2015', 'state'], inplace=True)

In [10]:
NYClocdata.rename(columns={'primary_city':'city'}, inplace=True)

In [11]:
nybosdata = pd.concat([NYClocdata, MAlocdata], ignore_index=True)
nybosdata.rename(columns={'primary_city':'city'}, inplace=True)

### Final Location Data Table

In [12]:
nybosdata

Unnamed: 0,zip,city,county,latitude,longitude
0,10001,New York,New York County,40.75,-74.0
1,10002,New York,New York County,40.71,-73.99
2,10003,New York,New York County,40.73,-73.99
3,10004,New York,New York County,40.69,-74.02
4,10005,New York,New York County,40.71,-74.01
5,10006,New York,New York County,40.71,-74.01
6,10007,New York,New York County,40.71,-74.01
7,10008,New York,New York County,40.71,-73.99
8,10009,New York,New York County,40.73,-73.98
9,10010,New York,New York County,40.74,-73.98


## Initial maps of NYC and Boston

In [13]:
addressNYC = 'New York City, NY'

geolocator = Nominatim(user_agent='Final_project')
locationNYC = geolocator.geocode(addressNYC)
latNYC = locationNYC.latitude
longNYC = locationNYC.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latNYC, longNYC))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


In [14]:
addressBOS = 'Boston, MA'

geolocator = Nominatim(user_agent='Final_project')
locationBOS = geolocator.geocode(addressBOS)
latBOS = locationBOS.latitude
longBOS = locationBOS.longitude
print('The geograpical coordinate of Boston are {}, {}.'.format(latBOS, longBOS))

The geograpical coordinate of Boston are 42.3602534, -71.0582912.


### NYC map with zipcode popups

In [15]:
map_newyork = folium.Map(location=[latNYC, longNYC], zoom_start=10)

for lat, lng, zipc, city in zip(NYClocdata['latitude'], NYClocdata['longitude'], NYClocdata['zip'], NYClocdata['city']):
    label = '{}, {}'.format(zipc, city)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

### Boston map with zipcode popups

In [16]:
map_boston = folium.Map(location=[latBOS, longBOS], zoom_start=10)

for lat, lng, zipc, city in zip(MAlocdata['latitude'], MAlocdata['longitude'], MAlocdata['zip'], MAlocdata['city']):
    label = '{}, {}'.format(zipc, city)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_boston)  
    
map_boston

## Bringing in Foursquare data

### Defining Foursquare credentials:

In [17]:
CLIENT_ID = '5PYOPFAT54OJP3NYENDDUIU4RW1DNANTJKABT5TNGBDT2RY2' # your Foursquare ID
CLIENT_SECRET = 'OTARQI2XSFCXBYP2BPKHZCYQAXKQKP1XPG1V2SYAYIISRGNK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5PYOPFAT54OJP3NYENDDUIU4RW1DNANTJKABT5TNGBDT2RY2
CLIENT_SECRET:OTARQI2XSFCXBYP2BPKHZCYQAXKQKP1XPG1V2SYAYIISRGNK


In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng,
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
nybosdata.head()

Unnamed: 0,zip,city,county,latitude,longitude
0,10001,New York,New York County,40.75,-74.0
1,10002,New York,New York County,40.71,-73.99
2,10003,New York,New York County,40.73,-73.99
3,10004,New York,New York County,40.69,-74.02
4,10005,New York,New York County,40.71,-74.01


In [20]:
nybos_venues = getNearbyVenues(nybosdata['zip'], nybosdata['latitude'], nybosdata['longitude'])

10001
10002
10003
10004
10005
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018
10019
10020
10021
10022
10023
10024
10025
10026
10027
10028
10029
10030
10031
10032
10033
10034
10035
10036
10037
10038
10039
10040
10041
10043
10044
10045
10046
10047
10048
10055
10060
10065
10069
10072
10075
10079
10080
10081
10082
10087
10090
10094
10095
10096
10098
10099
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10128
10129
10130
10131
10132
10133
10138
10149
10150
10151
10152
10153
10154
10155
10156
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10174
10175
10176
10177
10178
10179
10184
10185
10196
10197
10199
10203
10211
10212
10213
10242
10249
10256
10257
10258
10259
10260
10261
10265
10268
10269
10270
10271
10272
10273
10274
10275
10276
10277
10278
10279
10280
10281
10282
10285
10286
10292
10301
10302
1030

In [75]:
nybosdata.shape

(394, 5)

In [76]:
nybos_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,10001,40.75,-74.0,Bluestone Lane,40.752254,-73.998824,Coffee Shop
1,10001,40.75,-74.0,Porteño,40.750443,-74.002407,Argentinian Restaurant
2,10001,40.75,-74.0,Jun-Men Ramen Bar,40.747956,-74.000301,Ramen Restaurant
3,10001,40.75,-74.0,26th Street Viewing Spur,40.749825,-74.003352,Scenic Lookout
4,10001,40.75,-74.0,Hudson Market,40.750678,-74.002087,Grocery Store


In [77]:
nybos_venues.groupby('Neighborhood').count().head()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2108,100,100,100,100,100,100
2109,23,23,23,23,23,23
2110,100,100,100,100,100,100
2111,100,100,100,100,100,100
2112,100,100,100,100,100,100


## Analyzing the data

### One-hot encoding:

In [78]:
nybos_onehot = pd.get_dummies(nybos_venues[['Venue Category']], prefix="", prefix_sep="")
nybos_onehot['Neighborhood'] = nybos_venues['Neighborhood']
fixed_columns = [nybos_onehot.columns[-1]] + list(nybos_onehot.columns[:-1])
nybos_onehot = nybos_onehot[fixed_columns]

nybos_onehot.head()

Unnamed: 0,Zoo Exhibit,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,Airport Tram,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Border Crossing,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Campground,Candy Store,Cantonese Restaurant,Car Wash,Caribbean Restaurant,Caucasian Restaurant,Cemetery,Check Cashing Service,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Churrascaria,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Bookstore,College Cafeteria,College Gym,College Quad,College Theater,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cruise,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dosa Place,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Space,Exhibit,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Financial or Legal Service,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,High School,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Indoor Play Area,Industrial Estate,Intersection,Irish Pub,Island,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Leather Goods Store,Lebanese Restaurant,Library,Lighthouse,Lingerie Store,Liquor Store,Locksmith,Lounge,Luggage Store,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Moroccan Restaurant,Motel,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,Nail Salon,National Park,Neighborhood,New American Restaurant,Newsstand,Nightclub,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Sculpture,Outdoors & Recreation,Outlet Store,Paella Restaurant,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Parking,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Lab,Physical Therapist,Piano Bar,Pie Shop,Pier,Pilates Studio,Pizza Place,Platform,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Pool Hall,Portuguese Restaurant,Print Shop,Pub,Public Art,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Road,Rock Club,Roller Rink,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Salvadoran Restaurant,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Skating Rink,Ski Chalet,Ski Shop,Smoke Shop,Snack Place,Soba Restaurant,Soccer Field,Social Club,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Squash Court,State / Provincial Park,Stationery Store,Steakhouse,Storage Facility,Street Art,Street Food Gathering,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swiss Restaurant,Synagogue,Szechuan Restaurant,TV Station,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Taxi Stand,Tea Room,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tiki Bar,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tram Station,Tunnel,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Used Bookstore,Vape Store,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse,Warehouse Store,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10001,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10001,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10001,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10001,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10001,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [79]:
nybos_onehot.shape

(16969, 441)

In [80]:
nybos_grouped = nybos_onehot.groupby('Neighborhood').mean().reset_index()
nybos_grouped.head()

Unnamed: 0,Neighborhood,Zoo Exhibit,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,Airport Tram,American Restaurant,Animal Shelter,Antique Shop,Aquarium,Arcade,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auditorium,Australian Restaurant,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bike Trail,Bistro,Board Shop,Boat or Ferry,Bookstore,Border Crossing,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Cambodian Restaurant,Camera Store,Campground,Candy Store,Cantonese Restaurant,Car Wash,Caribbean Restaurant,Caucasian Restaurant,Cemetery,Check Cashing Service,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Churrascaria,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Academic Building,College Arts Building,College Bookstore,College Cafeteria,College Gym,College Quad,College Theater,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Community Center,Concert Hall,Construction & Landscaping,Convenience Store,Cooking School,Cosmetics Shop,Coworking Space,Creperie,Cruise,Cuban Restaurant,Cultural Center,Cupcake Shop,Cycle Studio,Czech Restaurant,Dance Studio,Daycare,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Doctor's Office,Dog Run,Donut Shop,Dosa Place,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Space,Exhibit,Factory,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Financial or Legal Service,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,Football Stadium,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Halal Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,High School,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotpot Restaurant,Hunan Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Indoor Play Area,Industrial Estate,Intersection,Irish Pub,Island,Israeli Restaurant,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Kitchen Supply Store,Korean Restaurant,Kosher Restaurant,Lake,Latin American Restaurant,Laundromat,Laundry Service,Lawyer,Leather Goods Store,Lebanese Restaurant,Library,Lighthouse,Lingerie Store,Liquor Store,Locksmith,Lounge,Luggage Store,Mac & Cheese Joint,Malay Restaurant,Market,Martial Arts Dojo,Massage Studio,Mattress Store,Medical Center,Mediterranean Restaurant,Memorial Site,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Moroccan Restaurant,Motel,Movie Theater,Moving Target,Multiplex,Museum,Music Store,Music Venue,Nail Salon,National Park,New American Restaurant,Newsstand,Nightclub,Noodle House,North Indian Restaurant,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Nightlife,Other Repair Shop,Outdoor Sculpture,Outdoors & Recreation,Outlet Store,Paella Restaurant,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Parking,Pastry Shop,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Service,Pet Store,Pharmacy,Photography Lab,Physical Therapist,Piano Bar,Pie Shop,Pier,Pilates Studio,Pizza Place,Platform,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Pool Hall,Portuguese Restaurant,Print Shop,Pub,Public Art,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Rental Service,Residential Building (Apartment / Condo),Resort,Rest Area,Restaurant,River,Road,Rock Club,Roller Rink,Roof Deck,Russian Restaurant,Sake Bar,Salad Place,Salon / Barbershop,Salvadoran Restaurant,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,School,Sculpture Garden,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shipping Store,Shoe Repair,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Skating Rink,Ski Chalet,Ski Shop,Smoke Shop,Snack Place,Soba Restaurant,Soccer Field,Social Club,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Speakeasy,Spiritual Center,Sporting Goods Shop,Sports Bar,Sports Club,Squash Court,State / Provincial Park,Stationery Store,Steakhouse,Storage Facility,Street Art,Street Food Gathering,Strip Club,Supermarket,Supplement Shop,Surf Spot,Sushi Restaurant,Swiss Restaurant,Synagogue,Szechuan Restaurant,TV Station,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Taxi Stand,Tea Room,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tiki Bar,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tram Station,Tunnel,Turkish Restaurant,Udon Restaurant,Ukrainian Restaurant,Used Bookstore,Vape Store,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Volleyball Court,Warehouse,Warehouse Store,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Zoo
0,2108,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.05,0.0,0.0,0.0,0.01,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0
1,2109,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.173913,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0
2,2110,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0
4,2112,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0


Confirm shape of nybos_grouped:

In [81]:
nybos_grouped.shape

(394, 441)

Confirm that no rows were lost during onehot encoding and subsequent grouping:

In [82]:
nybos_grouped.shape[0] == nybos_venues.groupby('Neighborhood').count().shape[0]

True

### Dataframe with top 10 venues per zip:

In [83]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [84]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
zip_venues_sorted = pd.DataFrame(columns=columns)
zip_venues_sorted['Neighborhood'] = nybos_grouped['Neighborhood']

for ind in np.arange(nybos_grouped.shape[0]):
    zip_venues_sorted.iloc[ind, 1:] = return_most_common_venues(nybos_grouped.iloc[ind, :], num_top_venues)

zip_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2108,Historic Site,Seafood Restaurant,Coffee Shop,Sandwich Place,American Restaurant,Hotel,Italian Restaurant,Bakery,Restaurant,New American Restaurant
1,2109,Harbor / Marina,History Museum,Boat or Ferry,Spa,Park,Gym,Tourist Information Center,Athletics & Sports,Tunnel,Café
2,2110,Boat or Ferry,Seafood Restaurant,Harbor / Marina,Italian Restaurant,Coffee Shop,Park,Aquarium,Bar,Hotel,Sandwich Place
3,2111,Asian Restaurant,Chinese Restaurant,Bakery,Coffee Shop,Sushi Restaurant,Theater,Sandwich Place,Pizza Place,Gym,Performing Arts Venue
4,2112,Asian Restaurant,Chinese Restaurant,Bakery,Coffee Shop,Sushi Restaurant,Theater,Sandwich Place,Pizza Place,Gym,Performing Arts Venue


## Cluster the Zips:

In [85]:
kclusters = 10
nybos_grouped_clustering = nybos_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(nybos_grouped_clustering)

kmeans.labels_

array([0, 0, 0, 8, 8, 0, 0, 0, 0, 8, 0, 0, 4, 0, 0, 8, 4, 0, 4, 0, 0, 0,
       0, 8, 4, 8, 8, 8, 0, 4, 4, 0, 0, 0, 8, 0, 8, 0, 8, 8, 8, 0, 0, 8,
       8, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 2, 0, 0, 0, 0, 0, 2,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0,
       8, 8, 8, 8, 8, 0, 0, 0, 0, 8, 2, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 0,
       0, 0, 2, 2, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0,
       0, 0, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2,
       2, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 0, 0,
       0, 0, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 3, 2,
       2, 2, 5, 8, 5, 5, 5, 3, 5, 8, 4, 5, 0, 9, 3, 0, 0, 4, 4, 4, 8, 0,
       4, 4, 8, 8, 4, 4, 4, 6, 3, 4, 3, 4, 4, 7, 3, 4, 0, 4, 4, 8, 3, 4,
       0, 8, 4, 8, 8, 8, 0, 0, 8, 0, 3, 3, 4, 0, 0, 3, 3, 0, 4, 0, 3, 8,
       0, 0, 0, 0, 4, 8, 8, 0, 0, 8, 0, 8, 4, 8, 4,

In [86]:
nybosdata.rename(columns={'zip':'Neighborhood'}, inplace=True)

nybos_merged = pd.merge(nybosdata,zip_venues_sorted, how='outer', on='Neighborhood', indicator=True)

nybos_merged = nybos_merged[nybos_merged['_merge']=='both']

drop_list = list(zip_venues_sorted.columns)
drop_list.pop(0)
drop_list.append('_merge')
drop_list

['1st Most Common Venue',
 '2nd Most Common Venue',
 '3rd Most Common Venue',
 '4th Most Common Venue',
 '5th Most Common Venue',
 '6th Most Common Venue',
 '7th Most Common Venue',
 '8th Most Common Venue',
 '9th Most Common Venue',
 '10th Most Common Venue',
 '_merge']

In [87]:
nybos_merged = nybos_merged.drop(columns=drop_list)

In [88]:
nybos_merged['Cluster'] = kmeans.labels_

In [89]:
nybos_merged.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster
0,10001,New York,New York County,40.75,-74.0,0
1,10002,New York,New York County,40.71,-73.99,0
2,10003,New York,New York County,40.73,-73.99,0
3,10004,New York,New York County,40.69,-74.02,8
4,10005,New York,New York County,40.71,-74.01,8


In [90]:
nybos_merged = pd.merge(nybos_merged,zip_venues_sorted, how='outer', on='Neighborhood', indicator=True)
nybos_merged.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
0,10001,New York,New York County,40.75,-74.0,0,Art Gallery,Coffee Shop,Deli / Bodega,Chinese Restaurant,Pizza Place,Cocktail Bar,Tapas Restaurant,Lounge,Health & Beauty Service,Theater,both
1,10002,New York,New York County,40.71,-73.99,0,Café,Malay Restaurant,Art Gallery,French Restaurant,Other Great Outdoors,Motel,Cantonese Restaurant,Mexican Restaurant,Skating Rink,Skate Park,both
2,10003,New York,New York County,40.73,-73.99,0,Japanese Restaurant,Grocery Store,Italian Restaurant,Coffee Shop,Dessert Shop,Ice Cream Shop,Gym,Yoga Studio,Chinese Restaurant,Spa,both
3,10004,New York,New York County,40.69,-74.02,8,Food Truck,Seafood Restaurant,Café,Food Stand,Ice Cream Shop,Bike Rental / Bike Share,Beer Garden,Gym / Fitness Center,Bar,Baseball Field,both
4,10005,New York,New York County,40.71,-74.01,8,Coffee Shop,Hotel,Memorial Site,Pizza Place,Sandwich Place,Bar,Park,Café,Food Truck,American Restaurant,both


### Visualizing the Clusters:

In [91]:
map_clusters = folium.Map(location=[39.828273, -98.579456], zoom_start=1)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(nybos_merged['latitude'], nybos_merged['longitude'], nybos_merged['Neighborhood'], nybos_merged['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Splitting the nyla_merged dataframe by cluster

### Cluster 1:

In [92]:
nybos_cluster1 = nybos_merged[nybos_merged['Cluster']==1]
nybos_cluster1.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
379,2211,Boston,Suffolk County,42.35,-71.06,1,Asian Restaurant,Chinese Restaurant,Bakery,Coffee Shop,Sushi Restaurant,Theater,Sandwich Place,Pizza Place,Gym,Performing Arts Venue,both


In [93]:
nyc_cluster1 = nybos_cluster1[(nybos_cluster1['county']=='New York County') | (nybos_cluster1['county']=='Bronx County') | (nybos_cluster1['county']=='Kings County') |
                      (nybos_cluster1['county']=='Queens County') | (nybos_cluster1['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 1.'.format(nyc_cluster1.shape[0]))

There are 0 New York City zipcodes in cluster 1.


In [96]:
bos_cluster1 = nybos_cluster1[nybos_cluster1['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 1.'.format(bos_cluster1.shape[0]))

There are 1 Boston zipcodes in cluster 1.


### Cluster 2:

In [97]:
nybos_cluster2 = nybos_merged[nybos_merged['Cluster']==2]
nybos_cluster2.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
59,10094,New York,New York County,40.71,-74.0,2,Café,Ice Cream Shop,Chinese Restaurant,Italian Restaurant,Dim Sum Restaurant,Bar,Bakery,Grocery Store,Coffee Shop,Seafood Restaurant,both
65,10102,New York,New York County,40.71,-73.99,2,Café,Malay Restaurant,Art Gallery,French Restaurant,Other Great Outdoors,Motel,Cantonese Restaurant,Mexican Restaurant,Skating Rink,Skate Park,both
98,10150,New York,New York County,40.71,-73.99,2,Café,Malay Restaurant,Art Gallery,French Restaurant,Other Great Outdoors,Motel,Cantonese Restaurant,Mexican Restaurant,Skating Rink,Skate Park,both
99,10151,New York,New York County,40.71,-73.99,2,Café,Malay Restaurant,Art Gallery,French Restaurant,Other Great Outdoors,Motel,Cantonese Restaurant,Mexican Restaurant,Skating Rink,Skate Park,both
101,10153,New York,New York County,40.76,-73.97,2,Spa,American Restaurant,Coffee Shop,Hotel,Boutique,New American Restaurant,Italian Restaurant,Chinese Restaurant,Salon / Barbershop,Bakery,both


In [98]:
nyc_cluster2 = nybos_cluster2[(nybos_cluster2['county']=='New York County') | (nybos_cluster2['county']=='Bronx County') | (nybos_cluster2['county']=='Kings County') |
                      (nybos_cluster2['county']=='Queens County') | (nybos_cluster2['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 2.'.format(nyc_cluster2.shape[0]))

There are 81 New York City zipcodes in cluster 2.


In [99]:
bos_cluster2 = nybos_cluster2[nybos_cluster2['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 2.'.format(bos_cluster2.shape[0]))

There are 0 Boston zipcodes in cluster 2.


### Cluster 3:

In [100]:
nybos_cluster3 = nybos_merged[nybos_merged['Cluster']==3]
nybos_cluster3.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
86,10123,New York,New York County,40.71,-73.99,3,Café,Malay Restaurant,Art Gallery,French Restaurant,Other Great Outdoors,Motel,Cantonese Restaurant,Mexican Restaurant,Skating Rink,Skate Park,both
218,11204,Brooklyn,Kings County,40.62,-73.98,3,Pharmacy,Grocery Store,Pizza Place,Middle Eastern Restaurant,Zoo,Farmers Market,English Restaurant,Entertainment Service,Ethiopian Restaurant,Event Space,both
227,11213,Brooklyn,Kings County,40.67,-73.94,3,Pizza Place,Grocery Store,Café,Deli / Bodega,Fried Chicken Joint,Candy Store,Clothing Store,Bagel Shop,Coffee Shop,Sandwich Place,both
234,11220,Brooklyn,Kings County,40.64,-74.02,3,Pizza Place,Mexican Restaurant,Grocery Store,Ice Cream Shop,Bakery,American Restaurant,School,Sandwich Place,Coffee Shop,Dessert Shop,both
250,11237,Brooklyn,Kings County,40.7,-73.92,3,Coffee Shop,Mexican Restaurant,Bar,Pizza Place,Deli / Bodega,Latin American Restaurant,Dive Bar,Italian Restaurant,Bakery,Thrift / Vintage Store,both


In [101]:
nyc_cluster3 = nybos_cluster3[(nybos_cluster3['county']=='New York County') | (nybos_cluster3['county']=='Bronx County') | (nybos_cluster3['county']=='Kings County') |
                      (nybos_cluster3['county']=='Queens County') | (nybos_cluster3['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 3.'.format(nyc_cluster3.shape[0]))

There are 23 New York City zipcodes in cluster 3.


In [102]:
bos_cluster3 = nybos_cluster3[nybos_cluster3['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 3.'.format(bos_cluster3.shape[0]))

There are 6 Boston zipcodes in cluster 3.


### Cluster 4

In [103]:
nybos_cluster4 = nybos_merged[nybos_merged['Cluster']==4]
nybos_cluster4.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
12,10013,New York,New York County,40.72,-74.0,4,Clothing Store,Women's Store,Café,Yoga Studio,Men's Store,Bakery,Furniture / Home Store,Art Gallery,Mediterranean Restaurant,Ice Cream Shop,both
16,10017,New York,New York County,40.75,-73.97,4,Italian Restaurant,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Steakhouse,Park,Asian Restaurant,Deli / Bodega,Bakery,Seafood Restaurant,both
18,10019,New York,New York County,40.77,-73.99,4,Park,Gym / Fitness Center,Theater,Coffee Shop,Sculpture Garden,Hotel Bar,Wine Bar,Hotel,Gastropub,Pizza Place,both
24,10025,New York,New York County,40.8,-73.97,4,Pizza Place,Chinese Restaurant,Mexican Restaurant,Coffee Shop,Indian Restaurant,Italian Restaurant,Bagel Shop,Grocery Store,Noodle House,Liquor Store,both
29,10030,New York,New York County,40.82,-73.94,4,Southern / Soul Food Restaurant,Bus Station,BBQ Joint,Rental Car Location,Baseball Field,Lounge,Tennis Court,Tapas Restaurant,Middle Eastern Restaurant,Fried Chicken Joint,both


In [104]:
nyc_cluster4 = nybos_cluster4[(nybos_cluster4['county']=='New York County') | (nybos_cluster4['county']=='Bronx County') | (nybos_cluster4['county']=='Kings County') |
                      (nybos_cluster4['county']=='Queens County') | (nybos_cluster4['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 4.'.format(nyc_cluster4.shape[0]))

There are 33 New York City zipcodes in cluster 4.


In [105]:
bos_cluster4 = nybos_cluster4[nybos_cluster4['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 4.'.format(bos_cluster4.shape[0]))

There are 15 Boston zipcodes in cluster 4.


### Cluster 5:

In [106]:
nybos_cluster5 = nybos_merged[nybos_merged['Cluster']==5]
nybos_cluster5.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
222,11208,Brooklyn,Kings County,40.67,-73.87,5,Deli / Bodega,Sandwich Place,Discount Store,Food Truck,Supermarket,Pizza Place,Bus Stop,Yoga Studio,Park,Chinese Restaurant,both
224,11210,Brooklyn,Kings County,40.63,-73.95,5,Mobile Phone Shop,Caribbean Restaurant,Pharmacy,Chinese Restaurant,Deli / Bodega,Restaurant,Breakfast Spot,Coffee Shop,Sporting Goods Shop,Shipping Store,both
225,11211,Brooklyn,Kings County,40.71,-73.95,5,Bar,Coffee Shop,Pizza Place,Italian Restaurant,Mexican Restaurant,Bakery,Japanese Restaurant,Deli / Bodega,Chinese Restaurant,New American Restaurant,both
226,11212,Brooklyn,Kings County,40.66,-73.91,5,Restaurant,Fried Chicken Joint,Airport Terminal,Park,Discount Store,Pizza Place,Caribbean Restaurant,Bus Line,Chinese Restaurant,Moving Target,both
228,11214,Brooklyn,Kings County,40.6,-74.0,5,Rental Car Location,Ice Cream Shop,Hookah Bar,Park,Shanghai Restaurant,Donut Shop,Pizza Place,Supplement Shop,Surf Spot,Restaurant,both


In [107]:
nyc_cluster5 = nybos_cluster5[(nybos_cluster5['county']=='New York County') | (nybos_cluster5['county']=='Bronx County') | (nybos_cluster5['county']=='Kings County') |
                      (nybos_cluster5['county']=='Queens County') | (nybos_cluster5['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 5.'.format(nyc_cluster1.shape[0]))

There are 0 New York City zipcodes in cluster 5.


In [108]:
bos_cluster5 = nybos_cluster5[nybos_cluster5['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 5.'.format(bos_cluster5.shape[0]))

There are 2 Boston zipcodes in cluster 5.


### Cluster 6:

In [109]:
nybos_cluster6 = nybos_merged[nybos_merged['Cluster']==6]
nybos_cluster6.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
249,11236,Brooklyn,Kings County,40.64,-73.9,6,Caribbean Restaurant,Cosmetics Shop,Taco Place,Bagel Shop,Gym,Pizza Place,Fast Food Restaurant,Mobile Phone Shop,Martial Arts Dojo,Diner,both


In [110]:
nyc_cluster6 = nybos_cluster6[(nybos_cluster6['county']=='New York County') | (nybos_cluster6['county']=='Bronx County') | (nybos_cluster6['county']=='Kings County') |
                      (nybos_cluster6['county']=='Queens County') | (nybos_cluster6['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 6.'.format(nyc_cluster6.shape[0]))

There are 1 New York City zipcodes in cluster 6.


In [111]:
bos_cluster6 = nybos_cluster6[nybos_cluster6['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 6.'.format(bos_cluster6.shape[0]))

There are 0 Boston zipcodes in cluster 6.


### Cluster 7:

In [112]:
nybos_cluster7 = nybos_merged[nybos_merged['Cluster']==7]
nybos_cluster7.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
255,11242,Brooklyn,Kings County,40.64,-73.94,7,Supermarket,Playground,Rental Car Location,Food & Drink Shop,Food,Moving Target,Fast Food Restaurant,Mexican Restaurant,Park,Caribbean Restaurant,both


In [113]:
nyc_cluster7 = nybos_cluster7[(nybos_cluster7['county']=='New York County') | (nybos_cluster7['county']=='Bronx County') | (nybos_cluster7['county']=='Kings County') |
                      (nybos_cluster7['county']=='Queens County') | (nybos_cluster7['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 7.'.format(nyc_cluster7.shape[0]))

There are 1 New York City zipcodes in cluster 7.


In [114]:
bos_cluster7 = nybos_cluster7[nybos_cluster7['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 7.'.format(bos_cluster7.shape[0]))

There are 0 Boston zipcodes in cluster 7.


### Cluster 8:

In [115]:
nybos_cluster8 = nybos_merged[nybos_merged['Cluster']==8]
nybos_cluster8.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
3,10004,New York,New York County,40.69,-74.02,8,Food Truck,Seafood Restaurant,Café,Food Stand,Ice Cream Shop,Bike Rental / Bike Share,Beer Garden,Gym / Fitness Center,Bar,Baseball Field,both
4,10005,New York,New York County,40.71,-74.01,8,Coffee Shop,Hotel,Memorial Site,Pizza Place,Sandwich Place,Bar,Park,Café,Food Truck,American Restaurant,both
9,10010,New York,New York County,40.74,-73.98,8,Indian Restaurant,American Restaurant,Ice Cream Shop,Italian Restaurant,Bar,Coffee Shop,Grocery Store,Thrift / Vintage Store,Hotel,Cocktail Bar,both
15,10016,New York,New York County,40.75,-73.98,8,Hotel,Coffee Shop,Sandwich Place,Japanese Restaurant,Italian Restaurant,Gym / Fitness Center,Chinese Restaurant,American Restaurant,Cuban Restaurant,Steakhouse,both
23,10024,New York,New York County,40.8,-73.97,8,Pizza Place,Chinese Restaurant,Mexican Restaurant,Coffee Shop,Indian Restaurant,Italian Restaurant,Bagel Shop,Grocery Store,Noodle House,Liquor Store,both


In [116]:
nyc_cluster8 = nybos_cluster8[(nybos_cluster8['county']=='New York County') | (nybos_cluster8['county']=='Bronx County') | (nybos_cluster8['county']=='Kings County') |
                      (nybos_cluster8['county']=='Queens County') | (nybos_cluster8['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 8.'.format(nyc_cluster8.shape[0]))

There are 52 New York City zipcodes in cluster 8.


In [117]:
bos_cluster8 = nybos_cluster8[nybos_cluster8['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 8.'.format(bos_cluster8.shape[0]))

There are 20 Boston zipcodes in cluster 8.


### Cluster 9:

In [118]:
nybos_cluster9 = nybos_merged[nybos_merged['Cluster']==9]
nybos_cluster9.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
233,11219,Brooklyn,Kings County,40.63,-74.0,9,Chinese Restaurant,Supermarket,Italian Restaurant,Bus Station,Bar,Bank,Event Space,Hardware Store,Cantonese Restaurant,Seafood Restaurant,both
357,2129,Charlestown,Suffolk County,42.38,-71.06,9,Pizza Place,Liquor Store,Boat or Ferry,Monument / Landmark,Gastropub,Historic Site,History Museum,Discount Store,Café,Dry Cleaner,both
378,2210,Boston,Suffolk County,42.35,-71.04,9,Seafood Restaurant,Café,Donut Shop,Steakhouse,American Restaurant,Art Museum,Harbor / Marina,Gym,Music Venue,Coffee Shop,both
381,2215,Boston,Suffolk County,42.35,-71.1,9,Sports Bar,Coffee Shop,American Restaurant,Hotel,Lounge,Hot Dog Joint,Fast Food Restaurant,Donut Shop,Pizza Place,Baseball Field,both
384,2222,Boston,Suffolk County,42.35,-71.06,9,Asian Restaurant,Chinese Restaurant,Bakery,Coffee Shop,Sushi Restaurant,Theater,Sandwich Place,Pizza Place,Gym,Performing Arts Venue,both


In [119]:
nyc_cluster9 = nybos_cluster9[(nybos_cluster9['county']=='New York County') | (nybos_cluster9['county']=='Bronx County') | (nybos_cluster9['county']=='Kings County') |
                      (nybos_cluster9['county']=='Queens County') | (nybos_cluster9['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 9.'.format(nyc_cluster9.shape[0]))

There are 1 New York City zipcodes in cluster 9.


In [121]:
bos_cluster9 = nybos_cluster9[nybos_cluster9['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 9.'.format(bos_cluster9.shape[0]))

There are 5 Boston zipcodes in cluster 9.


### Cluster 0:

In [122]:
nybos_cluster0 = nybos_merged[nybos_merged['Cluster']==0]
nybos_cluster0.head()

Unnamed: 0,Neighborhood,city,county,latitude,longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,_merge
0,10001,New York,New York County,40.75,-74.0,0,Art Gallery,Coffee Shop,Deli / Bodega,Chinese Restaurant,Pizza Place,Cocktail Bar,Tapas Restaurant,Lounge,Health & Beauty Service,Theater,both
1,10002,New York,New York County,40.71,-73.99,0,Café,Malay Restaurant,Art Gallery,French Restaurant,Other Great Outdoors,Motel,Cantonese Restaurant,Mexican Restaurant,Skating Rink,Skate Park,both
2,10003,New York,New York County,40.73,-73.99,0,Japanese Restaurant,Grocery Store,Italian Restaurant,Coffee Shop,Dessert Shop,Ice Cream Shop,Gym,Yoga Studio,Chinese Restaurant,Spa,both
5,10006,New York,New York County,40.71,-74.01,0,Coffee Shop,Hotel,Memorial Site,Pizza Place,Sandwich Place,Bar,Park,Café,Food Truck,American Restaurant,both
6,10007,New York,New York County,40.71,-74.01,0,Coffee Shop,Hotel,Memorial Site,Pizza Place,Sandwich Place,Bar,Park,Café,Food Truck,American Restaurant,both


In [123]:
nyc_cluster0 = nybos_cluster0[(nybos_cluster0['county']=='New York County') | (nybos_cluster0['county']=='Bronx County') | (nybos_cluster0['county']=='Kings County') |
                      (nybos_cluster0['county']=='Queens County') | (nybos_cluster0['county']=='Richmond County')]
print('There are {} New York City zipcodes in cluster 0.'.format(nyc_cluster0.shape[0]))

There are 138 New York City zipcodes in cluster 0.


In [124]:
bos_cluster0 = nybos_cluster0[nybos_cluster0['county']=='Suffolk County']
print('There are {} Boston zipcodes in cluster 0.'.format(bos_cluster0.shape[0]))

There are 9 Boston zipcodes in cluster 0.
