# Part 1:  Data Cleaning

### ***Import needed dependencies***

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import random
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt

import seaborn as sns

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium 
import folium # map rendering library

!pip install selenium
from selenium import webdriver

!pip install beautifulsoup4
!pip install lxml

!pip install geopandas
import geopandas

print('Libraries imported!')

Libraries imported!


In [3]:
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset
print('Data downloaded!')

Data downloaded!


### *Load & explore the data*

In [4]:
with open('newyork_data.json') as json_data:
    ny_data= json.load(json_data)

In [4]:
ny_data

{'type': 'FeatureCollection',
 'totalFeatures': 306,
 'features': [{'type': 'Feature',
   'id': 'nyu_2451_34572.1',
   'geometry': {'type': 'Point',
    'coordinates': [-73.84720052054902, 40.89470517661]},
   'geometry_name': 'geom',
   'properties': {'name': 'Wakefield',
    'stacked': 1,
    'annoline1': 'Wakefield',
    'annoline2': None,
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.84720052054902,
     40.89470517661,
     -73.84720052054902,
     40.89470517661]}},
  {'type': 'Feature',
   'id': 'nyu_2451_34572.2',
   'geometry': {'type': 'Point',
    'coordinates': [-73.82993910812398, 40.87429419303012]},
   'geometry_name': 'geom',
   'properties': {'name': 'Co-op City',
    'stacked': 2,
    'annoline1': 'Co-op',
    'annoline2': 'City',
    'annoline3': None,
    'annoangle': 0.0,
    'borough': 'Bronx',
    'bbox': [-73.82993910812398,
     40.87429419303012,
     -73.82993910812398,
     40.87429419303012]}},
  {'type': 'Feature',
 

The features key is a list of neighborhoods.  We will define 'Features' as a new variable for our dataset. 

In [5]:
neighborhoods_data =ny_data['features']

In [8]:
neighborhoods_data[10]

{'type': 'Feature',
 'id': 'nyu_2451_34572.11',
 'geometry': {'type': 'Point',
  'coordinates': [-73.83579759808117, 40.866858107252696]},
 'geometry_name': 'geom',
 'properties': {'name': 'Baychester',
  'stacked': 1,
  'annoline1': 'Baychester',
  'annoline2': None,
  'annoline3': None,
  'annoangle': 0.0,
  'borough': 'Bronx',
  'bbox': [-73.83579759808117,
   40.866858107252696,
   -73.83579759808117,
   40.866858107252696]}}

### *Transform into a pandas dataframe*

In [6]:
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
nyc_neighborhoods= pd.DataFrame(columns=column_names)

In [7]:
nyc_neighborhoods

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude


Loop through the data, filling in one column at a time, using the above columns from nyc_neighborhoods.

In [8]:
for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    nyc_neighborhoods=nyc_neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [9]:
nyc_neighborhoods.head(3)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806


In [10]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(nyc_neighborhoods['Borough'].unique()),
        nyc_neighborhoods.shape[0]
    )
)

The dataframe has 5 boroughs and 306 neighborhoods.


In [11]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent="nyc_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of New York City are 40.7127281, -74.0060152.


### *Use geopy library to get the latitude & longitude values for New York City*
*Create a map of New York City with neighborhoods superimposed above*

In [14]:
# create map of New York using latitude and longitude values
nyc_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(nyc_neighborhoods['Latitude'], nyc_neighborhoods['Longitude'], nyc_neighborhoods['Borough'], nyc_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(nyc_map)  
    
nyc_map

In [12]:
manhattan_data = nyc_neighborhoods[nyc_neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
manhattan_data.head(3)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369


In [13]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="nyc_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Manhattan are 40.7896239, -73.9598939.


### *Use geopy library to get the latitude & longitude values for Manhattan*
*Create a map of Manhattan, NYC, with neighborhoods superimposed above*

In [18]:
# create map of Manhattan using latitude and longitude values
manhattan_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(manhattan_map)  
    
manhattan_map

# Part 2:  Gather Foursquare API Credentials 

In [16]:
CLIENT_ID = '3EWBY5I2CQPHYYXIKY0TULD2SKGSIM2MO00TJVNELH022UU0' # your Foursquare ID
CLIENT_SECRET = 'UXOL25HKVG0S2HZ1UH0SPVPKR5PMAEAL0AYA1MSYPFKS2IU2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value
radius=500

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: 3EWBY5I2CQPHYYXIKY0TULD2SKGSIM2MO00TJVNELH022UU0
CLIENT_SECRET:UXOL25HKVG0S2HZ1UH0SPVPKR5PMAEAL0AYA1MSYPFKS2IU2


*Using the Foursquare API, we will obtain the top 100 venues that are in Manhattan within a radius of 500 meters.*

In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, categoryId=''):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        if (categoryId != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryId)

        # make the GET request
        response = requests.get(url).json()
        results = response["response"]['venues']
        
        # return only relevant information for each nearby venue
        for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

*Using the category ID for vegan/vegetarian food offerings provided by Foursquare, we will make a list of the Manhattan neighborhoods that contain these cuisines.*

In [37]:
#neighborhoods = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
veg_venues = getNearbyVenues(names=manhattan_data['Neighborhood'], 
                                latitudes=manhattan_data['Latitude'], 
                                longitudes=manhattan_data['Longitude'], 
                                radius=1000, 
                                categoryId='4bf58dd8d48988d1d3941735')

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


*Create a map of Manhattan, NYC, with neighborhoods containing vegan/vegetarian cuisines.*

In [22]:
for lat, lng, venue, venue_cat, neighborhood in zip(veg_venues['Venue Latitude'], veg_venues['Venue Longitude'], veg_venues['Venue'], veg_venues['Venue Category'], veg_venues['Neighborhood']):
    label = '{}, {}, {}'.format(venue, venue_cat, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(manhattan_map)  
    
manhattan_map

In [35]:
print(veg_venues.shape)
veg_venues.head()

(951, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Kingsbridge-Riverdale Farmers' Market,40.879973,-73.907295,Vegetarian / Vegan Restaurant
1,Chinatown,40.715618,-73.994279,Happy Veggie Restaurant,40.716511,-73.997922,Vegetarian / Vegan Restaurant
2,Chinatown,40.715618,-73.994279,Vegetarian Dim Sum House,40.714892,-73.998321,Vegetarian / Vegan Restaurant
3,Chinatown,40.715618,-73.994279,May Wah Vegetarian Market,40.718582,-73.999437,Grocery Store
4,Chinatown,40.715618,-73.994279,Juice Press,40.723439,-73.994877,Juice Bar


*Check the number of vegan/vegetarian venues returned for each neighborhood in Manhattan.*

In [30]:
veg_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,13,13,13,13,13,13
Carnegie Hill,8,8,8,8,8,8
Central Harlem,6,6,6,6,6,6
Chelsea,34,34,34,34,34,34
Chinatown,45,45,45,45,45,45
Civic Center,35,35,35,35,35,35
Clinton,24,24,24,24,24,24
East Harlem,5,5,5,5,5,5
East Village,49,49,49,49,49,49
Financial District,13,13,13,13,13,13


In [31]:
print('There are {} uniques categories.'.format(len(veg_venues['Venue Category'].unique())))

There are 37 uniques categories.


# Part 3:  Analyze Each Neighborhood

In [32]:
# one hot encoding
manhattan_onehot = pd.get_dummies(veg_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = veg_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns]

manhattan_onehot

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Bagel Shop,Bakery,Burger Joint,Café,Coffee Shop,Deli / Bodega,Dessert Shop,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,Food Truck,Gluten-free Restaurant,Grocery Store,Health Food Store,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Kosher Restaurant,Latin American Restaurant,Lounge,Mac & Cheese Joint,Market,Mediterranean Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Salad Place,Sandwich Place,Sushi Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant
0,Marble Hill,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
6,Chinatown,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
8,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
9,Chinatown,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [33]:
manhattan_onehot.shape

(951, 38)

*Group rows by neighborhood and take the mean of the frequency of occurrence of each category

In [34]:
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()
manhattan_grouped

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Bagel Shop,Bakery,Burger Joint,Café,Coffee Shop,Deli / Bodega,Dessert Shop,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,Food Truck,Gluten-free Restaurant,Grocery Store,Health Food Store,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Kosher Restaurant,Latin American Restaurant,Lounge,Mac & Cheese Joint,Market,Mediterranean Restaurant,Mexican Restaurant,New American Restaurant,Pizza Place,Restaurant,Salad Place,Sandwich Place,Sushi Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant
0,Battery Park City,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.461538
1,Carnegie Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.875
2,Central Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.833333
3,Chelsea,0.0,0.029412,0.0,0.029412,0.029412,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.088235,0.029412,0.0,0.029412,0.029412,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.029412,0.029412,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.5
4,Chinatown,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.022222,0.022222,0.0,0.0,0.0,0.0,0.022222,0.0,0.022222,0.0,0.0,0.0,0.0,0.022222,0.022222,0.0,0.022222,0.0,0.0,0.0,0.022222,0.022222,0.0,0.044444,0.0,0.022222,0.0,0.0,0.0,0.688889
5,Civic Center,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.657143
6,Clinton,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.583333
7,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
8,East Village,0.0,0.0,0.0,0.0,0.040816,0.0,0.020408,0.020408,0.020408,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.040816,0.0,0.020408,0.0,0.0,0.020408,0.714286
9,Financial District,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.538462


In [30]:
manhattan_grouped.shape

(38, 38)

In [28]:
#sort by descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

*Print each neighborhood along with the top 10 most common venues*

In [29]:
#new df w/ top 10 venues 

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
sorted_venues_neigh= pd.DataFrame(columns=columns)
sorted_venues_neigh['Neighborhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    sorted_venues_neigh.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)

sorted_venues_neigh.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Vegetarian / Vegan Restaurant,Salad Place,Falafel Restaurant,Food Truck,Juice Bar,Burger Joint,Eastern European Restaurant,Gluten-free Restaurant,Filipino Restaurant,Fast Food Restaurant
1,Carnegie Hill,Vegetarian / Vegan Restaurant,Juice Bar,Eastern European Restaurant,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Dessert Shop
2,Central Harlem,Vegetarian / Vegan Restaurant,Market,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop
3,Chelsea,Vegetarian / Vegan Restaurant,Food Truck,Salad Place,Juice Bar,Café,Mediterranean Restaurant,Ice Cream Shop,Gluten-free Restaurant,Health Food Store,Mexican Restaurant
4,Chinatown,Vegetarian / Vegan Restaurant,Pizza Place,Mediterranean Restaurant,Japanese Restaurant,Juice Bar,Dessert Shop,Latin American Restaurant,Deli / Bodega,Food Truck,Grocery Store


# Part 4:  K-Means Clustering Analysis

*Using K-Means, divide the neighborhoods into 5 clusters.*

In [32]:
# set number of clusters
kclusters = 5

manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 2, 2, 3, 1, 1, 3, 2, 1, 3], dtype=int32)

*Create a new dataframe that contains the 5 clusters and the top 10 venues from above.*

In [33]:
# add clustering labels
sorted_venues_neigh.insert(0, 'Cluster Labels', kmeans.labels_)

manhattan_merged = manhattan_data

# merge manhattan_merged with sorted_venues_neigh to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(sorted_venues_neigh.set_index('Neighborhood'), on='Neighborhood')

manhattan_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,2.0,Vegetarian / Vegan Restaurant,Ice Cream Shop,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop
1,Manhattan,Chinatown,40.715618,-73.994279,1.0,Vegetarian / Vegan Restaurant,Pizza Place,Mediterranean Restaurant,Japanese Restaurant,Juice Bar,Dessert Shop,Latin American Restaurant,Deli / Bodega,Food Truck,Grocery Store
2,Manhattan,Washington Heights,40.851903,-73.9369,0.0,Lounge,Vegetarian / Vegan Restaurant,Eastern European Restaurant,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Dessert Shop
3,Manhattan,Inwood,40.867684,-73.92121,,,,,,,,,,,
4,Manhattan,Hamilton Heights,40.823604,-73.949688,2.0,Vegetarian / Vegan Restaurant,Ice Cream Shop,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop


In [34]:
manhattan_merged=manhattan_merged.dropna()
manhattan_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38 entries, 0 to 39
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Borough                 38 non-null     object 
 1   Neighborhood            38 non-null     object 
 2   Latitude                38 non-null     float64
 3   Longitude               38 non-null     float64
 4   Cluster Labels          38 non-null     float64
 5   1st Most Common Venue   38 non-null     object 
 6   2nd Most Common Venue   38 non-null     object 
 7   3rd Most Common Venue   38 non-null     object 
 8   4th Most Common Venue   38 non-null     object 
 9   5th Most Common Venue   38 non-null     object 
 10  6th Most Common Venue   38 non-null     object 
 11  7th Most Common Venue   38 non-null     object 
 12  8th Most Common Venue   38 non-null     object 
 13  9th Most Common Venue   38 non-null     object 
 14  10th Most Common Venue  38 non-null     obje

*Visualize clusters in a map*

In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color = rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
      
map_clusters

# Part 5:  Analyze Clusters

In [36]:
#Cluster 1
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Washington Heights,Lounge,Vegetarian / Vegan Restaurant,Eastern European Restaurant,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Dessert Shop


In [37]:
#Cluster2
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Vegetarian / Vegan Restaurant,Pizza Place,Mediterranean Restaurant,Japanese Restaurant,Juice Bar,Dessert Shop,Latin American Restaurant,Deli / Bodega,Food Truck,Grocery Store
8,Upper East Side,Vegetarian / Vegan Restaurant,Salad Place,Juice Bar,Falafel Restaurant,Dessert Shop,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Eastern European Restaurant
10,Lenox Hill,Vegetarian / Vegan Restaurant,Salad Place,Bagel Shop,Juice Bar,Latin American Restaurant,Falafel Restaurant,Eastern European Restaurant,Gluten-free Restaurant,Food Truck,Filipino Restaurant
11,Roosevelt Island,Vegetarian / Vegan Restaurant,Salad Place,Dessert Shop,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant
13,Lincoln Square,Vegetarian / Vegan Restaurant,Italian Restaurant,Salad Place,Dessert Shop,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant
15,Midtown,Vegetarian / Vegan Restaurant,Juice Bar,Salad Place,New American Restaurant,American Restaurant,Restaurant,Kosher Restaurant,Fast Food Restaurant,Eastern European Restaurant,Japanese Restaurant
18,Greenwich Village,Vegetarian / Vegan Restaurant,Salad Place,Juice Bar,Bakery,Pizza Place,Grocery Store,Gluten-free Restaurant,Food Truck,Latin American Restaurant,Deli / Bodega
19,East Village,Vegetarian / Vegan Restaurant,Juice Bar,Pizza Place,Burger Joint,Coffee Shop,Thai Restaurant,Latin American Restaurant,Dessert Shop,Mac & Cheese Joint,Deli / Bodega
20,Lower East Side,Vegetarian / Vegan Restaurant,Burger Joint,Pizza Place,Mac & Cheese Joint,Filipino Restaurant,Japanese Restaurant,Latin American Restaurant,Dessert Shop,Deli / Bodega,Mexican Restaurant
22,Little Italy,Vegetarian / Vegan Restaurant,Juice Bar,Salad Place,Pizza Place,Asian Restaurant,Food Truck,Bakery,Mexican Restaurant,Grocery Store,Deli / Bodega


In [38]:
#cluster 3
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Vegetarian / Vegan Restaurant,Ice Cream Shop,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop
4,Hamilton Heights,Vegetarian / Vegan Restaurant,Ice Cream Shop,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop
5,Manhattanville,Vegetarian / Vegan Restaurant,Ice Cream Shop,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop
6,Central Harlem,Vegetarian / Vegan Restaurant,Market,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop
7,East Harlem,Vegetarian / Vegan Restaurant,Ice Cream Shop,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Eastern European Restaurant,Dessert Shop
9,Yorkville,Vegetarian / Vegan Restaurant,Juice Bar,Eastern European Restaurant,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Dessert Shop
30,Carnegie Hill,Vegetarian / Vegan Restaurant,Juice Bar,Eastern European Restaurant,Grocery Store,Gluten-free Restaurant,Food Truck,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Dessert Shop


In [39]:
#cluster 4
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Upper West Side,Vegetarian / Vegan Restaurant,Sandwich Place,Salad Place,Food Truck,Juice Bar,Health Food Store,Eastern European Restaurant,Gluten-free Restaurant,Filipino Restaurant,Fast Food Restaurant
14,Clinton,Vegetarian / Vegan Restaurant,Juice Bar,American Restaurant,Salad Place,Deli / Bodega,Restaurant,Mexican Restaurant,Fast Food Restaurant,Food Truck,Filipino Restaurant
16,Murray Hill,Vegetarian / Vegan Restaurant,Salad Place,Juice Bar,Indian Restaurant,American Restaurant,Japanese Restaurant,Sushi Restaurant,Fast Food Restaurant,Grocery Store,Restaurant
17,Chelsea,Vegetarian / Vegan Restaurant,Food Truck,Salad Place,Juice Bar,Café,Mediterranean Restaurant,Ice Cream Shop,Gluten-free Restaurant,Health Food Store,Mexican Restaurant
21,Tribeca,Vegetarian / Vegan Restaurant,Juice Bar,Salad Place,Falafel Restaurant,Mediterranean Restaurant,Food Truck,Deli / Bodega,Burger Joint,Bakery,Asian Restaurant
24,West Village,Vegetarian / Vegan Restaurant,Juice Bar,Salad Place,Mediterranean Restaurant,Gluten-free Restaurant,Food Truck,Health Food Store,Deli / Bodega,Café,Burger Joint
28,Battery Park City,Vegetarian / Vegan Restaurant,Salad Place,Falafel Restaurant,Food Truck,Juice Bar,Burger Joint,Eastern European Restaurant,Gluten-free Restaurant,Filipino Restaurant,Fast Food Restaurant
29,Financial District,Vegetarian / Vegan Restaurant,Falafel Restaurant,Food Truck,Salad Place,Juice Bar,Burger Joint,Eastern European Restaurant,Gluten-free Restaurant,Filipino Restaurant,Fast Food Restaurant
33,Midtown South,Vegetarian / Vegan Restaurant,Juice Bar,Food Truck,Indian Restaurant,Salad Place,Japanese Restaurant,American Restaurant,Deli / Bodega,Thai Restaurant,Sushi Restaurant
36,Tudor City,Vegetarian / Vegan Restaurant,Indian Restaurant,New American Restaurant,Salad Place,Sushi Restaurant,Japanese Restaurant,Juice Bar,Food Truck,Bakery,Burger Joint
