 # Capstone Project - The Battle of Neighborhoods

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from geopy.geocoders import Nominatim
import urllib.request
import json
from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.colors as colors
%matplotlib inline
from sklearn.cluster import KMeans

import folium

print('Libraries imported.')

Libraries imported.


## Download and Explore Dataset


Download and Explore Dataset
Neighborhood has a total of 5 boroughs and 306 neighborhoods. In order to segement the neighborhoods and explore them, we will essentially need a dataset that contains the 5 boroughs and the neighborhoods that exist in each borough as well as the the latitude and logitude coordinates of each neighborhood.

Luckily, this dataset exists for free on the web. Feel free to try to find this dataset on your own, but here is the link to the dataset: https://geo.nyu.edu/catalog/nyu_2451_34572

For your convenience, I downloaded the files and placed it on the server, so you can simply run a wget command and access the data. So let's go ahead and do that.

In [2]:
# !wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

with open('nyu_2451_34572-geojson.json') as json_data:
    newyork_data = json.load(json_data)

Data downloaded!


#### Tranform the data into a *pandas* dataframe

In [3]:
neighborhoods_data = newyork_data['features']
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [4]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


#### Use geopy library to get the latitude and longitude values of New York City.

In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent <em>ny_explorer</em>, as shown below.

In [5]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


#### Create a map of New York with neighborhoods superimposed on top.

In [6]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [7]:
import folium
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Borough'], manhattan_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Foursquare venues


In [8]:
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',  
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [9]:
LIMIT = 500 
radius = 5000 
CLIENT_ID = 'VKGPFS34QLZBWMMSVBPE5CTWAY3FOJHVZRTHAQ1WCPW0ONUJ'
CLIENT_SECRET = 'R5Z2AHSST5CXL5O4QNOARVCW4Y2E31NONMIH05GFYBLVZWWU'
VERSION = '20181020'

In [10]:
#https://developer.foursquare.com/docs/resources/categories
# fitness = 4bf58dd8d48988d175941735

neighborhoods = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
newyork_venues_fitness_centers = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d175941735')
newyork_venues_fitness_centers.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Bronx Boxing,40.875671,-73.908355,Boxing Gym
1,Marble Hill,40.876551,-73.91066,24 Hour Fitness,40.880592,-73.908255,Gym / Fitness Center
2,Marble Hill,40.876551,-73.91066,Planet Fitness,40.874088,-73.909137,Gym / Fitness Center
3,Marble Hill,40.876551,-73.91066,Astral Fitness & Wellness Center,40.876705,-73.906372,Gym
4,Marble Hill,40.876551,-73.91066,La Palestra 99,40.873919,-73.917065,Gym


In [11]:
newyork_venues_fitness_centers.shape

(1885, 7)

In [12]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [13]:
map_newyork_fitness_centers = folium.Map(location=[latitude, longitude], zoom_start=10)
addToMap(newyork_venues_fitness_centers, 'red', map_newyork_fitness_centers)

map_newyork_fitness_centers

In [14]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

In [15]:
manhattan_grouped = newyork_venues_fitness_centers.groupby('Neighborhood').count()
manhattan_grouped


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,50,50,50,50,50,50
Carnegie Hill,50,50,50,50,50,50
Central Harlem,47,47,47,47,47,47
Chelsea,50,50,50,50,50,50
Chinatown,50,50,50,50,50,50
Civic Center,50,50,50,50,50,50
Clinton,50,50,50,50,50,50
East Harlem,48,48,48,48,48,48
East Village,50,50,50,50,50,50
Financial District,50,50,50,50,50,50


In [16]:
print('There are {} uniques categories.'.format(len(newyork_venues_fitness_centers['Venue Category'].unique())))

There are 41 uniques categories.


## 3. Analyze Each Neighborhood

In [17]:
# one hot encoding
manhattan_onehot = pd.get_dummies(newyork_venues_fitness_centers[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = newyork_venues_fitness_centers['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,Athletics & Sports,Basketball Court,Boxing Gym,Building,Climbing Gym,Club House,Community Center,Corporate Amenity,Cultural Center,Cycle Studio,Dance Studio,Doctor's Office,Dog Run,General College & University,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Martial Arts Dojo,Massage Studio,Medical Center,Non-Profit,Office,Outdoor Gym,Park,Physical Therapist,Pilates Studio,Playground,Pool,Recreation Center,Residential Building (Apartment / Condo),School,Spa,Spiritual Center,Sports Club,State / Provincial Park,Tennis Court,Tennis Stadium,Track,Weight Loss Center,Yoga Studio
0,Marble Hill,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [18]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,Athletics & Sports,Basketball Court,Boxing Gym,Building,Climbing Gym,Club House,Community Center,Corporate Amenity,Cultural Center,Cycle Studio,Dance Studio,Doctor's Office,Dog Run,General College & University,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Martial Arts Dojo,Massage Studio,Medical Center,Non-Profit,Office,Outdoor Gym,Park,Physical Therapist,Pilates Studio,Playground,Pool,Recreation Center,Residential Building (Apartment / Condo),School,Spa,Spiritual Center,Sports Club,State / Provincial Park,Tennis Court,Tennis Stadium,Track,Weight Loss Center,Yoga Studio
0,Battery Park City,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.02,0.0,0.0,0.36,0.34,0.06,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06
1,Carnegie Hill,0.0,0.0,0.04,0.02,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.34,0.36,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.08
2,Central Harlem,0.021277,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.0,0.021277,0.340426,0.340426,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.106383
3,Chelsea,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.14,0.5,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08
4,Chinatown,0.04,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.26,0.3,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2
5,Civic Center,0.04,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.28,0.26,0.04,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18
6,Clinton,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.44,0.28,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.1
7,East Harlem,0.0,0.0,0.041667,0.020833,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.479167,0.270833,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.041667,0.0625
8,East Village,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.26,0.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.12
9,Financial District,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.02,0.0,0.0,0.38,0.34,0.04,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04


In [19]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [20]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Gym,Gym / Fitness Center,Yoga Studio,Boxing Gym,Gym Pool,Cycle Studio,Massage Studio,Corporate Amenity,Doctor's Office,Athletics & Sports
1,Carnegie Hill,Gym / Fitness Center,Gym,Yoga Studio,Boxing Gym,Community Center,Physical Therapist,Pool,Martial Arts Dojo,Cycle Studio,Climbing Gym
2,Central Harlem,Gym,Gym / Fitness Center,Yoga Studio,Climbing Gym,Cycle Studio,Martial Arts Dojo,Pilates Studio,General College & University,Athletics & Sports,Tennis Court
3,Chelsea,Gym / Fitness Center,Cycle Studio,Gym,Yoga Studio,Boxing Gym,Martial Arts Dojo,Recreation Center,Physical Therapist,Building,Dog Run
4,Chinatown,Gym / Fitness Center,Gym,Yoga Studio,Boxing Gym,Pilates Studio,Cycle Studio,Athletics & Sports,Martial Arts Dojo,Office,Spiritual Center


Cluster Neighborhoods


In [21]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 2, 2, 0, 4, 4, 3, 3, 2, 3])

In [22]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head() 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,1,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Martial Arts Dojo,Weight Loss Center,Tennis Stadium,Boxing Gym,Doctor's Office,Gym Pool
1,Manhattan,Chinatown,40.715618,-73.994279,4,Gym / Fitness Center,Gym,Yoga Studio,Boxing Gym,Pilates Studio,Cycle Studio,Athletics & Sports,Martial Arts Dojo,Office,Spiritual Center
2,Manhattan,Washington Heights,40.851903,-73.9369,3,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Dance Studio,Gymnastics Gym,Gym Pool,General College & University,Dog Run,Doctor's Office
3,Manhattan,Inwood,40.867684,-73.92121,1,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Dance Studio,Gymnastics Gym,Gym Pool,General College & University,Dog Run,Doctor's Office
4,Manhattan,Hamilton Heights,40.823604,-73.949688,2,Gym / Fitness Center,Gym,Yoga Studio,Martial Arts Dojo,Cycle Studio,Track,Basketball Court,State / Provincial Park,Climbing Gym,Doctor's Office


In [23]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [24]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Chelsea,Gym / Fitness Center,Cycle Studio,Gym,Yoga Studio,Boxing Gym,Martial Arts Dojo,Recreation Center,Physical Therapist,Building,Dog Run
27,Gramercy,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Martial Arts Dojo,Boxing Gym,Pilates Studio,Dog Run,Gymnastics Gym,Gym Pool
38,Flatiron,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Martial Arts Dojo,Boxing Gym,Pilates Studio,Dog Run,Gymnastics Gym,Gym Pool


In [25]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Martial Arts Dojo,Weight Loss Center,Tennis Stadium,Boxing Gym,Doctor's Office,Gym Pool
3,Inwood,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Dance Studio,Gymnastics Gym,Gym Pool,General College & University,Dog Run,Doctor's Office
5,Manhattanville,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Basketball Court,Climbing Gym,Cycle Studio,Gym Pool,Martial Arts Dojo,Park
25,Manhattan Valley,Gym,Gym / Fitness Center,Yoga Studio,Martial Arts Dojo,Track,Gym Pool,Playground,Cycle Studio,Dance Studio,General College & University


In [26]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Hamilton Heights,Gym / Fitness Center,Gym,Yoga Studio,Martial Arts Dojo,Cycle Studio,Track,Basketball Court,State / Provincial Park,Climbing Gym,Doctor's Office
6,Central Harlem,Gym,Gym / Fitness Center,Yoga Studio,Climbing Gym,Cycle Studio,Martial Arts Dojo,Pilates Studio,General College & University,Athletics & Sports,Tennis Court
8,Upper East Side,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Building,Martial Arts Dojo,Club House,Spa,School,Track
9,Yorkville,Gym / Fitness Center,Gym,Yoga Studio,School,Physical Therapist,Pilates Studio,Cycle Studio,Pool,Martial Arts Dojo,Community Center
10,Lenox Hill,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Tennis Court,School,Non-Profit,Physical Therapist,Pilates Studio,Boxing Gym
12,Upper West Side,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Cycle Studio,Martial Arts Dojo,Track,Playground,Massage Studio,Weight Loss Center
13,Lincoln Square,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Climbing Gym,Cultural Center,Dog Run,Gym Pool,Massage Studio
16,Murray Hill,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Boxing Gym,Building,Climbing Gym,Club House,Community Center,Corporate Amenity
19,East Village,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Cycle Studio,Boxing Gym,Track,Pool,Building,Gym Pool
24,West Village,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Track,Physical Therapist,Pool,Boxing Gym,Basketball Court


In [27]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Washington Heights,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Dance Studio,Gymnastics Gym,Gym Pool,General College & University,Dog Run,Doctor's Office
7,East Harlem,Gym,Gym / Fitness Center,Yoga Studio,Martial Arts Dojo,Weight Loss Center,Boxing Gym,Building,Climbing Gym,Track,Club House
11,Roosevelt Island,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,School,Non-Profit,Dance Studio,Gym Pool,Martial Arts Dojo,Club House
14,Clinton,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Residential Building (Apartment / Condo),Boxing Gym,Building,Medical Center,Track,Tennis Court
15,Midtown,Gym / Fitness Center,Gym,Cycle Studio,Yoga Studio,Boxing Gym,Building,Climbing Gym,Club House,Community Center,Corporate Amenity
28,Battery Park City,Gym,Gym / Fitness Center,Yoga Studio,Boxing Gym,Gym Pool,Cycle Studio,Massage Studio,Corporate Amenity,Doctor's Office,Athletics & Sports
29,Financial District,Gym,Gym / Fitness Center,Boxing Gym,Yoga Studio,Cycle Studio,Gym Pool,Massage Studio,Corporate Amenity,Doctor's Office,Martial Arts Dojo
34,Sutton Place,Gym,Gym / Fitness Center,Cycle Studio,Yoga Studio,Club House,Non-Profit,Pilates Studio,Gym Pool,Tennis Court,Track
35,Turtle Bay,Gym,Gym / Fitness Center,Cycle Studio,Yoga Studio,Track,Gym Pool,Boxing Gym,Sports Club,General College & University,Dog Run
36,Tudor City,Gym,Gym / Fitness Center,Cycle Studio,Yoga Studio,Pilates Studio,Boxing Gym,Gym Pool,Medical Center,Track,Spa


In [28]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Gym / Fitness Center,Gym,Yoga Studio,Boxing Gym,Pilates Studio,Cycle Studio,Athletics & Sports,Martial Arts Dojo,Office,Spiritual Center
18,Greenwich Village,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Boxing Gym,Pool,Office,Gym Pool,General College & University
20,Lower East Side,Gym / Fitness Center,Gym,Yoga Studio,Pilates Studio,Boxing Gym,Outdoor Gym,Pool,Community Center,Athletics & Sports,Track
21,Tribeca,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Boxing Gym,Pilates Studio,Gym Pool,Athletics & Sports,Pool,Office
22,Little Italy,Gym / Fitness Center,Gym,Yoga Studio,Cycle Studio,Pilates Studio,Boxing Gym,Athletics & Sports,Office,Spiritual Center,Cultural Center
23,Soho,Gym,Gym / Fitness Center,Yoga Studio,Pilates Studio,Boxing Gym,Cycle Studio,Athletics & Sports,Gym Pool,Pool,Office
32,Civic Center,Gym,Gym / Fitness Center,Yoga Studio,Cycle Studio,Boxing Gym,Gym Pool,Athletics & Sports,Pilates Studio,Office,Spiritual Center


**Conclusion**

Based on dataframe analysis above Cluster 0 (Chelsea) and Cluster 1 (Marble Hill) areas are the best places to open a new fitness center business.