# Capstone Project -- Finding a better place to open a Chinese restaurant in New York City

### Import all the necessary libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!pip install geopy
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#install and import the folium for Foursquare API lab
#!pip install folium
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Download and Explore the Dataset

The dataset can be simply downloaded from the IBM server using wget command.

In [2]:
!wget -q -O 'newyork_data.json' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs/newyork_data.json
print('Data downloaded!')

Data downloaded!


#### Load and explore the data

In [3]:
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

#### Define a new variable that includes the data
All the relevant data that we need is in the features key, which is basically a list of the neighbourhoods.

In [4]:
neighborhoods_data = newyork_data['features']
# have a peek at the first item in the list
neighborhoods_data[0]

{'type': 'Feature',
 'id': 'nyu_2451_34572.1',
 'geometry': {'type': 'Point',
  'coordinates': [-73.84720052054902, 40.89470517661]},
 'geometry_name': 'geom',
 'properties': {'name': 'Wakefield',
  'stacked': 1,
  'annoline1': 'Wakefield',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.84720052054902,
   40.89470517661,
   -73.84720052054902,
   40.89470517661]}}

#### Transform the data into a pandas dataframe

In [5]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude']

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

Then let's loop through the data and fill the dataframe one row at a time.

In [6]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

Have a look at the head rows of the resulting dataframe

In [7]:
neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


#### Use geopy library to get the latitude and longitude values of New York City
In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent ny_explorer, as shown below.

In [8]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 40.7127281, -74.0060152.


#### Creat a map of New York in Manhattan borough with neighborhoods superimposed on top.

In [10]:
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688


In [11]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Borough'], manhattan_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

#### Define Foursquare Credentials and Version

In [12]:
CLIENT_ID = '1HSGUO3ID1CT4OO11SBFWQII00YVCOYQNK30JQE2YRJUI3HS' # your Foursquare ID
CLIENT_SECRET = '30JPXZB3DO45HCTEJFXIQRYP5FOBBU24PQRFBSJTYSYLXVPC' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value
radius = 500

create a function to explore all the neighborhoods in NYC

In [13]:
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=500, categoryIds=''):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

        if (categoryIds != ''):
            url = url + '&categoryId={}'
            url = url.format(categoryIds)

        # make the GET request
        response = requests.get(url).json()
        results = response["response"]['venues']

        # return only relevant information for each nearby venue
        for v in results:
            success = False
            try:
                category = v['categories'][0]['name']
                success = True
            except:
                pass

            if success:
                venues_list.append([(
                    name, 
                    lat, 
                    lng, 
                    v['name'], 
                    v['location']['lat'], 
                    v['location']['lng'],
                    v['categories'][0]['name']
                )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',  
                  'Venue Category']
    
    return(nearby_venues)

In [14]:
#https://developer.foursquare.com/docs/resources/categories
#Chinese Restaurant = 4bf58dd8d48988d145941735
neighborhoods = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
newyork_venues_chi = getNearbyVenues(names=neighborhoods['Neighborhood'], latitudes=neighborhoods['Latitude'], longitudes=neighborhoods['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d145941735')
newyork_venues_chi.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,China Wang,40.874347,-73.91054,Chinese Restaurant
1,Marble Hill,40.876551,-73.91066,Anise,40.885989,-73.910036,Chinese Restaurant
2,Marble Hill,40.876551,-73.91066,Mei Chung Mei Restaurant,40.878272,-73.902981,Chinese Restaurant
3,Marble Hill,40.876551,-73.91066,Pioneer Chinese Restaurant,40.879541,-73.905327,Chinese Restaurant
4,Marble Hill,40.876551,-73.91066,New Yung Hong Chinese Restaurant,40.873272,-73.905128,Chinese Restaurant


In [15]:
newyork_venues_chi.shape

(1643, 7)

In [16]:
def addToMap(df, color, oldMap):
    for lat, lng, local, venue, venueCatlog in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCatlog, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(oldMap)

In [18]:
map_newyork_chi = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(newyork_venues_chi, 'red', map_newyork_chi)

map_newyork_chi

In [19]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Neighborhood').count()
    
    for n in startDf['Neighborhood']:
        try:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Neighborhood'] == n,columnTitle] = 0

find out how many unique categories can be curated from all the returned venues

In [21]:
nyc_grouped = newyork_venues_chi.groupby('Neighborhood').count()
print('There are {} uniques categories.'.format(len(newyork_venues_chi['Venue Category'].unique())))
nyc_grouped

There are 33 uniques categories.


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,33,33,33,33,33,33
Carnegie Hill,41,41,41,41,41,41
Central Harlem,44,44,44,44,44,44
Chelsea,49,49,49,49,49,49
Chinatown,50,50,50,50,50,50
Civic Center,50,50,50,50,50,50
Clinton,47,47,47,47,47,47
East Harlem,44,44,44,44,44,44
East Village,50,50,50,50,50,50
Financial District,38,38,38,38,38,38


###  Analyze Each Neighborhood

In [23]:
# one hot encoding
manhattan_onehot = pd.get_dummies(newyork_venues_chi[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = newyork_venues_chi['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Chinese Restaurant,Cuban Restaurant,Dessert Shop,Dim Sum Restaurant,Dumpling Restaurant,Food Court,Food Stand,Food Truck,Hakka Restaurant,Hotpot Restaurant,Hunan Restaurant,Indian Chinese Restaurant,Japanese Restaurant,Lounge,Mexican Restaurant,Noodle House,Peking Duck Restaurant,Peruvian Restaurant,Seafood Restaurant,Shanghai Restaurant,Street Food Gathering,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Xinjiang Restaurant
0,Marble Hill,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Marble Hill,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


 then group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [25]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,Asian Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Chinese Restaurant,Cuban Restaurant,Dessert Shop,Dim Sum Restaurant,Dumpling Restaurant,Food Court,Food Stand,Food Truck,Hakka Restaurant,Hotpot Restaurant,Hunan Restaurant,Indian Chinese Restaurant,Japanese Restaurant,Lounge,Mexican Restaurant,Noodle House,Peking Duck Restaurant,Peruvian Restaurant,Seafood Restaurant,Shanghai Restaurant,Street Food Gathering,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Xinjiang Restaurant
0,Battery Park City,0.030303,0.0,0.0,0.0,0.0,0.0,0.757576,0.0,0.0,0.030303,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.030303
1,Carnegie Hill,0.02439,0.0,0.0,0.0,0.0,0.0,0.829268,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04878,0.0,0.02439,0.0,0.0
2,Central Harlem,0.022727,0.0,0.0,0.0,0.0,0.0,0.931818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0,0.022727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.020408,0.0,0.020408,0.0,0.020408,0.0,0.816327,0.020408,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.061224,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Chinatown,0.0,0.06,0.0,0.0,0.06,0.02,0.44,0.0,0.02,0.14,0.08,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.04,0.0,0.0,0.0,0.02,0.0,0.04,0.0
5,Civic Center,0.0,0.06,0.0,0.0,0.04,0.06,0.48,0.0,0.02,0.1,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.06,0.0,0.0,0.0,0.02,0.0,0.04,0.0
6,Clinton,0.021277,0.021277,0.0,0.0,0.021277,0.0,0.723404,0.021277,0.0,0.021277,0.0,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.0,0.0,0.0,0.0
7,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.977273,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,0.0
8,East Village,0.04,0.0,0.0,0.0,0.0,0.0,0.68,0.0,0.0,0.08,0.04,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.08,0.0,0.02,0.0
9,Financial District,0.026316,0.0,0.0,0.0,0.0,0.0,0.710526,0.0,0.0,0.026316,0.0,0.0,0.0,0.131579,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.026316


write a function to sort the venues in descending order

In [26]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

then create the new dataframe and display the top 10 venues for each neighborhood.

In [27]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Chinese Restaurant,Food Truck,Xinjiang Restaurant,Street Food Gathering,Dim Sum Restaurant,Mexican Restaurant,Asian Restaurant,Shanghai Restaurant,Seafood Restaurant,Bakery
1,Carnegie Hill,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Indian Chinese Restaurant,Asian Restaurant,Thai Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng
2,Central Harlem,Chinese Restaurant,Asian Restaurant,Japanese Restaurant,Noodle House,Dessert Shop,Food Stand,Food Court,Dumpling Restaurant,Dim Sum Restaurant,Cuban Restaurant
3,Chelsea,Chinese Restaurant,Noodle House,Asian Restaurant,Dumpling Restaurant,Dim Sum Restaurant,Cuban Restaurant,Cantonese Restaurant,Bar,Cha Chaan Teng,Hotpot Restaurant
4,Chinatown,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Bakery,Cantonese Restaurant,Shanghai Restaurant,Vegetarian / Vegan Restaurant,Peking Duck Restaurant,Dessert Shop,Taiwanese Restaurant


### Cluster and segment the neighborhoods in Manhattan

Run _k_-means to cluster the neighborhood into 5 clusters.

In [28]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 0, 2, 0, 1, 1, 0, 2, 3, 4], dtype=int32)

create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [30]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data
manhattan_merged = neighborhoods.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,2,Chinese Restaurant,Xinjiang Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant,Dessert Shop
1,Manhattan,Chinatown,40.715618,-73.994279,1,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Bakery,Cantonese Restaurant,Shanghai Restaurant,Vegetarian / Vegan Restaurant,Peking Duck Restaurant,Dessert Shop,Taiwanese Restaurant
2,Manhattan,Washington Heights,40.851903,-73.9369,2,Chinese Restaurant,Asian Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant,Dessert Shop
3,Manhattan,Inwood,40.867684,-73.92121,2,Chinese Restaurant,Dumpling Restaurant,Xinjiang Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant
4,Manhattan,Hamilton Heights,40.823604,-73.949688,2,Chinese Restaurant,Xinjiang Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant,Dessert Shop


Finally, visualize the resulting clusters

In [31]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels'].astype('int32')):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Cluster 1

In [32]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Upper East Side,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Cantonese Restaurant,Noodle House,Bar,Bubble Tea Shop,Cha Chaan Teng,Hakka Restaurant
9,Yorkville,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Noodle House,Indian Chinese Restaurant,Cantonese Restaurant,Food Stand,Food Court,Dumpling Restaurant
10,Lenox Hill,Chinese Restaurant,Szechuan Restaurant,Asian Restaurant,Dim Sum Restaurant,Cantonese Restaurant,Noodle House,Bar,Bakery,Bubble Tea Shop,Cha Chaan Teng
11,Roosevelt Island,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Hotpot Restaurant,Cuban Restaurant
12,Upper West Side,Chinese Restaurant,Asian Restaurant,Sushi Restaurant,Dim Sum Restaurant,Cuban Restaurant,Peruvian Restaurant,Thai Restaurant,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng
14,Clinton,Chinese Restaurant,Szechuan Restaurant,Noodle House,Food Stand,Cuban Restaurant,Dim Sum Restaurant,Asian Restaurant,Cantonese Restaurant,Bakery,Cha Chaan Teng
17,Chelsea,Chinese Restaurant,Noodle House,Asian Restaurant,Dumpling Restaurant,Dim Sum Restaurant,Cuban Restaurant,Cantonese Restaurant,Bar,Cha Chaan Teng,Hotpot Restaurant
24,West Village,Chinese Restaurant,Noodle House,Vegetarian / Vegan Restaurant,Taiwanese Restaurant,Bar,Dim Sum Restaurant,Xinjiang Restaurant,Food Stand,Food Court,Dumpling Restaurant
25,Manhattan Valley,Chinese Restaurant,Asian Restaurant,Szechuan Restaurant,Peruvian Restaurant,Cuban Restaurant,Food Truck,Hotpot Restaurant,Hunan Restaurant,Sushi Restaurant,Seafood Restaurant
26,Morningside Heights,Chinese Restaurant,Szechuan Restaurant,Hunan Restaurant,Peruvian Restaurant,Food Truck,Hotpot Restaurant,Asian Restaurant,Shanghai Restaurant,Seafood Restaurant,Bakery


#### Cluster 2

In [33]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Bakery,Cantonese Restaurant,Shanghai Restaurant,Vegetarian / Vegan Restaurant,Peking Duck Restaurant,Dessert Shop,Taiwanese Restaurant
18,Greenwich Village,Chinese Restaurant,Dumpling Restaurant,Bakery,Dim Sum Restaurant,Cantonese Restaurant,Hotpot Restaurant,Vegetarian / Vegan Restaurant,Noodle House,Food Stand,Cha Chaan Teng
20,Lower East Side,Chinese Restaurant,Dumpling Restaurant,Taiwanese Restaurant,Cantonese Restaurant,Bakery,Hotpot Restaurant,Dim Sum Restaurant,Noodle House,Vegetarian / Vegan Restaurant,Hakka Restaurant
22,Little Italy,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Bakery,Cantonese Restaurant,Shanghai Restaurant,Peking Duck Restaurant,Dessert Shop,Taiwanese Restaurant,Hotpot Restaurant
23,Soho,Chinese Restaurant,Dim Sum Restaurant,Bakery,Dumpling Restaurant,Shanghai Restaurant,Vegetarian / Vegan Restaurant,Peking Duck Restaurant,Dessert Shop,Taiwanese Restaurant,Hotpot Restaurant
31,Noho,Chinese Restaurant,Dumpling Restaurant,Dim Sum Restaurant,Cantonese Restaurant,Taiwanese Restaurant,Hotpot Restaurant,Shanghai Restaurant,Vegetarian / Vegan Restaurant,Bakery,Szechuan Restaurant
32,Civic Center,Chinese Restaurant,Dim Sum Restaurant,Bakery,Shanghai Restaurant,Cha Chaan Teng,Dumpling Restaurant,Cantonese Restaurant,Vegetarian / Vegan Restaurant,Peking Duck Restaurant,Dessert Shop


#### Cluster 3

In [34]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Chinese Restaurant,Xinjiang Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant,Dessert Shop
2,Washington Heights,Chinese Restaurant,Asian Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant,Dessert Shop
3,Inwood,Chinese Restaurant,Dumpling Restaurant,Xinjiang Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant
4,Hamilton Heights,Chinese Restaurant,Xinjiang Restaurant,Hotpot Restaurant,Bakery,Bar,Bubble Tea Shop,Cantonese Restaurant,Cha Chaan Teng,Cuban Restaurant,Dessert Shop
5,Manhattanville,Chinese Restaurant,Japanese Restaurant,Noodle House,Xinjiang Restaurant,Dessert Shop,Food Stand,Food Court,Dumpling Restaurant,Dim Sum Restaurant,Cuban Restaurant
6,Central Harlem,Chinese Restaurant,Asian Restaurant,Japanese Restaurant,Noodle House,Dessert Shop,Food Stand,Food Court,Dumpling Restaurant,Dim Sum Restaurant,Cuban Restaurant
7,East Harlem,Chinese Restaurant,Thai Restaurant,Cuban Restaurant,Food Truck,Food Stand,Food Court,Dumpling Restaurant,Dim Sum Restaurant,Dessert Shop,Xinjiang Restaurant


#### Cluster 4

In [35]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Lincoln Square,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Shanghai Restaurant,Food Truck,Food Stand,Taiwanese Restaurant,Peking Duck Restaurant,Noodle House,Cuban Restaurant
15,Midtown,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Noodle House,Asian Restaurant,Shanghai Restaurant,Cuban Restaurant,Hunan Restaurant,Sushi Restaurant,Seafood Restaurant
16,Murray Hill,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Noodle House,Dumpling Restaurant,Cuban Restaurant,Hakka Restaurant,Shanghai Restaurant,Sushi Restaurant
19,East Village,Chinese Restaurant,Taiwanese Restaurant,Dim Sum Restaurant,Asian Restaurant,Dumpling Restaurant,Vegetarian / Vegan Restaurant,Hotpot Restaurant,Hunan Restaurant,Szechuan Restaurant,Seafood Restaurant
21,Tribeca,Chinese Restaurant,Bakery,Cantonese Restaurant,Asian Restaurant,Street Food Gathering,Shanghai Restaurant,Cha Chaan Teng,Noodle House,Dessert Shop,Dim Sum Restaurant
27,Gramercy,Chinese Restaurant,Dim Sum Restaurant,Taiwanese Restaurant,Szechuan Restaurant,Dumpling Restaurant,Asian Restaurant,Hotpot Restaurant,Hakka Restaurant,Shanghai Restaurant,Sushi Restaurant
33,Midtown South,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Asian Restaurant,Shanghai Restaurant,Cuban Restaurant,Hakka Restaurant,Sushi Restaurant,Taiwanese Restaurant
37,Stuyvesant Town,Chinese Restaurant,Taiwanese Restaurant,Szechuan Restaurant,Asian Restaurant,Dim Sum Restaurant,Lounge,Dumpling Restaurant,Bakery,Bar,Bubble Tea Shop
38,Flatiron,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Noodle House,Asian Restaurant,Hakka Restaurant,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Bar


#### Cluster 5

In [36]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,Battery Park City,Chinese Restaurant,Food Truck,Xinjiang Restaurant,Street Food Gathering,Dim Sum Restaurant,Mexican Restaurant,Asian Restaurant,Shanghai Restaurant,Seafood Restaurant,Bakery
29,Financial District,Chinese Restaurant,Food Truck,Xinjiang Restaurant,Seafood Restaurant,Dim Sum Restaurant,Mexican Restaurant,Asian Restaurant,Street Food Gathering,Shanghai Restaurant,Bakery
