# <center>Comparing_Philadelphia_NewYork_Neighborhoods</center>

In [1]:
#Import necessary libraries
from urllib.request import urlopen #library to open and read http requests
from bs4 import BeautifulSoup #library helpful to scrap the web pages

import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


<b>Assumptions</b>
<ul>
<li>The web page we are going to look at is 'https://en.wikipedia.org/wiki/List_of_Philadelphia_neighborhoods'.</li>
<li>It has the full list of Philadelphia neighborhoods and their boroughs.</li>
<li>We are going to use <b>BeautifulSoup</b> library in this notebook for scraping the table in the above Wikipedia page.</li>
</ul>

Open the wikipedia url using <b>urllib.urlopen<b> method

In [2]:
html = urlopen('https://en.wikipedia.org/wiki/List_of_Philadelphia_neighborhoods')
html

<http.client.HTTPResponse at 0x4d1e2b0>

Create an object of <b>BeautifulSoup</b> to read the html object

In [3]:
res = BeautifulSoup(html.read(), 'html5lib')

Print the title of Wikipedia page

In [4]:
print(res.title)

<title>List of Philadelphia neighborhoods - Wikipedia</title>


Create an empty dataframe with appropriate columns

In [5]:
column_names = ['Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
philadelphia_data = pd.DataFrame(columns=column_names)

Scrape the web content by using BeautifulSoup object 'res'

In [6]:
#Scrape Neighborhood, longitude and latitude values from the Wikipedia page
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
geolocator = Nominatim()

div = res.findAll('div', attrs = {'class':'div-col'}) 

for i in range(len(div)):
    
    rs = div[i].findAll('li')
    for result in rs:
        neighborhood_name = result.a.text.strip() #Get the name of neighborhood

        #Get the latitude & longitude values
        url = 'https://en.wikipedia.org' + result.a['href']
        if url.find('index.php') == -1:
            html = urlopen(url)
            response = BeautifulSoup(html.read(), 'html5lib')
            location = response.find('span', {'class':'geo'})
        else:
            location = None

        #Check if location has got lat & long values
        if location is None:
            try:
        #If none scraped, Use Nominatim object geolocator to retrieve latitude and longitude
                address = neighborhood_name + (', Philadelphia, US')
                location = geolocator.geocode(address)
            except Exception as error: #Handle timeout errors for inappropriate addresses
                neighborhood_lat is null
                neighborhood_lon is null
        else:
            neighborhood_lat = location.text.split(';')[0].strip()
            neighborhood_lon = location.text.split(';')[1].strip()


            #Append to the neighborhoods dataframe
        philadelphia_data = philadelphia_data.append({'Neighborhood': neighborhood_name,
                                                  'Latitude': neighborhood_lat,
                                                  'Longitude': neighborhood_lon}, ignore_index=True)

philadelphia_data.head()



Unnamed: 0,Neighborhood,Latitude,Longitude
0,Avenue of the Arts,39.9411,-75.1656
1,Callowhill,39.96,-75.158
2,Chinatown,39.9535,-75.1563
3,Elfreth's Alley,39.9527806,-75.1424556
4,French Quarter,39.9504,-75.1698


In [7]:
philadelphia_data.tail()

Unnamed: 0,Neighborhood,Latitude,Longitude
129,Pennypack,40.067,-75.05
130,Somerton,40.124,-75.008
131,Torresdale,40.05,-75.0
132,Upper Holmesburg,40.046,-75.009
133,Winchester Park,40.046,-75.019


Shape of the original DataFrame

In [8]:
philadelphia_data.shape

(134, 3)

In [9]:
#Check for duplicated values
philadelphia_data[philadelphia_data.Neighborhood.duplicated()]

Unnamed: 0,Neighborhood,Latitude,Longitude


Check of the presence of null values

In [10]:
philadelphia_data.isnull().sum()

Neighborhood    0
Latitude        0
Longitude       0
dtype: int64

In [11]:
print('The dataframe has {} neighborhoods.'.format(
        len(philadelphia_data['Neighborhood'].unique())
    )
)

The dataframe has 134 neighborhoods.


In [13]:
philadelphia_data.Latitude = philadelphia_data.Latitude.astype(float)
philadelphia_data.Longitude = philadelphia_data.Longitude.astype(float)

In [14]:
philadelphia_data.dtypes

Neighborhood     object
Latitude        float64
Longitude       float64
dtype: object

Save the dataframe to a csv file

In [15]:
philadelphia_data.to_csv('philadelphia_neighborhoods.csv', index=False)

In [16]:
newyork_neighborhoods = pd.read_csv('newyork_neighborhoods.csv')
newyork_neighborhoods.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [17]:
newyork_neighborhoods.drop('Borough', axis=1, inplace=True)

In [21]:
newyork_neighborhoods.shape

(306, 3)

In [18]:
philadelphia_neighborhoods = pd.read_csv('philadelphia_neighborhoods.csv')
philadelphia_neighborhoods.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Avenue of the Arts,39.9411,-75.1656
1,Callowhill,39.96,-75.158
2,Chinatown,39.9535,-75.1563
3,Elfreth's Alley,39.952781,-75.142456
4,French Quarter,39.9504,-75.1698


In [19]:
philadelphia_neighborhoods.shape

(130, 3)

In [54]:
ny_phil_data = pd.concat([newyork_neighborhoods,philadelphia_neighborhoods], ignore_index=True)
ny_phil_data.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Wakefield,40.894705,-73.847201
1,Co-op City,40.874294,-73.829939
2,Eastchester,40.887556,-73.827806
3,Fieldston,40.895437,-73.905643
4,Riverdale,40.890834,-73.912585


In [55]:
ny_phil_data.tail()

Unnamed: 0,Neighborhood,Latitude,Longitude
431,Pennypack,40.067,-75.05
432,Somerton,40.124,-75.008
433,Torresdale,40.05,-75.0
434,Upper Holmesburg,40.046,-75.009
435,Winchester Park,40.046,-75.019


In [56]:
ny_phil_data.shape

(436, 3)

In [57]:
#Check for duplicated values
ny_phil_data[ny_phil_data.Neighborhood.duplicated()]

Unnamed: 0,Neighborhood,Latitude,Longitude
180,Murray Hill,40.764126,-73.812763
220,Sunnyside,40.61276,-74.097126
235,Bay Terrace,40.553988,-74.139166
244,Chelsea,40.594726,-74.18956
308,Chinatown,39.9535,-75.1563


In [58]:
#Remove duplicte values based on column Neighborhood
ny_phil_data.drop_duplicates('Neighborhood', inplace=True)

In [59]:
ny_phil_data.shape

(431, 3)

In [60]:
ny_phil_data.isnull().sum()

Neighborhood    0
Latitude        0
Longitude       0
dtype: int64

In [61]:
len(ny_phil_data.Neighborhood.str.lower().unique())

431

#### Use geopy library to get the latitude and longitude values of New Brunswick City which is center to both New York and Philadelphia. 

In [103]:
address = 'New Brunswick, New Jersey, US'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New Brunswick City are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of New Brunswick City are 40.4862174, -74.4518173.


#### Create a map of New York & Philadelphia with neighborhoods superimposed on top.

In [31]:
# create map of New York and Philadelphia using latitude and longitude values
map_ny_phil = folium.Map(location=[latitude, longitude], zoom_start=9)

# add markers to map
for lat, lng, neighborhood in zip(ny_phil_data['Latitude'], ny_phil_data['Longitude'], ny_phil_data['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ny_phil)  
    
map_ny_phil

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

#### Define Foursquare Credentials and Version

In [34]:
#Define FourSquare credentials and version
CLIENT_ID = 'YEP0IXWCN4ZGNGNO21BL2WUSF3F2ZPOXRZ13YOTBYLL4XJGN' # your Foursquare ID
CLIENT_SECRET = 'MZ2BZLDMC2FQ2GLZVQ4OVDZVMRDYRQXHLKXI1GRJL3IJT4AG' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YEP0IXWCN4ZGNGNO21BL2WUSF3F2ZPOXRZ13YOTBYLL4XJGN
CLIENT_SECRET:MZ2BZLDMC2FQ2GLZVQ4OVDZVMRDYRQXHLKXI1GRJL3IJT4AG


#### Extract the category of the venue using get_category_type function

## Explore Neighborhoods in New York & Philadelphia

#### Let's create a function to extract venue details for all neighborhoods in New York & Philadelphia

In [32]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)    
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [35]:
# Now write the code to run the above function on each neighborhood and create a new dataframe called *ny_phil_venues*.
ny_phil_venues = getNearbyVenues(names=ny_phil_data['Neighborhood'],
                                   latitudes=ny_phil_data['Latitude'],
                                   longitudes=ny_phil_data['Longitude']
                                  )
ny_phil_venues.head()

Wakefield
Co-op City
Eastchester
Fieldston
Riverdale
Kingsbridge
Marble Hill
Woodlawn
Norwood
Williamsbridge
Baychester
Pelham Parkway
City Island
Bedford Park
University Heights
Morris Heights
Fordham
East Tremont
West Farms
High  Bridge
Melrose
Mott Haven
Port Morris
Longwood
Hunts Point
Morrisania
Soundview
Clason Point
Throgs Neck
Country Club
Parkchester
Westchester Square
Van Nest
Morris Park
Belmont
Spuyten Duyvil
North Riverdale
Pelham Bay
Schuylerville
Edgewater Park
Castle Hill
Olinville
Pelham Gardens
Concourse
Unionport
Edenwald
Bay Ridge
Bensonhurst
Sunset Park
Greenpoint
Gravesend
Brighton Beach
Sheepshead Bay
Manhattan Terrace
Flatbush
Crown Heights
East Flatbush
Kensington
Windsor Terrace
Prospect Heights
Brownsville
Williamsburg
Bushwick
Bedford Stuyvesant
Brooklyn Heights
Cobble Hill
Carroll Gardens
Red Hook
Gowanus
Fort Greene
Park Slope
Cypress Hills
East New York
Starrett City
Canarsie
Flatlands
Mill Island
Manhattan Beach
Coney Island
Bath Beach
Borough Park
Dyker

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Rite Aid,40.896521,-73.84468,Pharmacy
2,Wakefield,40.894705,-73.847201,Cooler Runnings Jamaican Restaurant Inc,40.898276,-73.850381,Caribbean Restaurant
3,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
4,Wakefield,40.894705,-73.847201,Dunkin Donuts,40.890631,-73.849027,Donut Shop


In [36]:
#Let's check the size of the resulting dataframe
ny_phil_venues.shape

(13843, 7)

In [62]:
len(ny_phil_venues.Neighborhood.unique())

430

In [38]:
#Let's check how many venues were returned for each neighborhood
ny_phil_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Academy Gardens,4,4,4,4,4,4
Allegheny West,5,5,5,5,5,5
Allerton,26,26,26,26,26,26
Angora,10,10,10,10,10,10
Annadale,9,9,9,9,9,9
Arden Heights,5,5,5,5,5,5
Arlington,5,5,5,5,5,5
Arrochar,17,17,17,17,17,17
Arverne,15,15,15,15,15,15
Ashton-Woodenbridge,3,3,3,3,3,3


In [39]:
#Let's find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(ny_phil_venues['Venue Category'].unique())))

There are 454 uniques categories.


## Analyze Each Neighborhood

In [40]:
# one hot encoding
ny_phil_onehot = pd.get_dummies(ny_phil_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_phil_onehot['Neighborhood'] = ny_phil_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [ny_phil_onehot.columns[-1]] + list(ny_phil_onehot.columns[:-1])
ny_phil_onehot = ny_phil_onehot[fixed_columns]

ny_phil_onehot.head()

Unnamed: 0,Zoo Exhibit,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Terminal,Airport Tram,American Restaurant,...,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
#And let's examine the new dataframe size.
ny_phil_onehot.shape

(13843, 454)

In [42]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
ny_phil_grouped = ny_phil_onehot.groupby('Neighborhood').mean().reset_index()
ny_phil_grouped.head()

Unnamed: 0,Neighborhood,Zoo Exhibit,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,Airport Terminal,Airport Tram,...,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Academy Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Allegheny West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Angora,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Annadale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
#Let's confirm the new size
ny_phil_grouped.shape

(430, 454)

In [44]:
#let's write a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [45]:
#Now let's create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_phil_grouped['Neighborhood']

for ind in np.arange(ny_phil_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_phil_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Academy Gardens,Golf Course,Baseball Field,Gym / Fitness Center,Zoo,Fish Market,Event Space,Exhibit,Factory,Falafel Restaurant,Farm
1,Allegheny West,Breakfast Spot,Football Stadium,Pizza Place,Beach,Grocery Store,Zoo,Fish & Chips Shop,Exhibit,Factory,Falafel Restaurant
2,Allerton,Pizza Place,Chinese Restaurant,Spa,Supermarket,Martial Arts Dojo,Discount Store,Electronics Store,Breakfast Spot,Pharmacy,Fast Food Restaurant
3,Angora,Intersection,Donut Shop,Shopping Plaza,Chinese Restaurant,Light Rail Station,Train Station,Convenience Store,Sandwich Place,Ethiopian Restaurant,Event Service
4,Annadale,Pizza Place,Liquor Store,Restaurant,Diner,Train Station,Sports Bar,American Restaurant,Falafel Restaurant,Fast Food Restaurant,Farmers Market


In [46]:
neighborhoods_venues_sorted.shape

(430, 11)

## Cluster Neighborhoods

Run k-means to cluster the neighborhood into 3 clusters.

In [47]:
# set number of clusters
kclusters = 3

ny_phil_grouped_clustering = ny_phil_grouped.drop(['Neighborhood'], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_phil_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 0, 0, 2, 0, 0, 2, 2, 2, 2])

In [48]:
len(kmeans.labels_)

430

In [66]:
#Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
ny_phil_merged = ny_phil_data

# merge ny_phil_grouped with ny_phil_data to add latitude/longitude for each neighborhood
ny_phil_merged = ny_phil_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood', how='right')

# add clustering labels
ny_phil_merged['Cluster Labels'] = kmeans.labels_

ny_phil_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Wakefield,40.894705,-73.847201,Sandwich Place,Ice Cream Shop,Donut Shop,Pharmacy,Dessert Shop,Caribbean Restaurant,Food Truck,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant,2
1,Co-op City,40.874294,-73.829939,Bus Station,Ice Cream Shop,Gift Shop,Salon / Barbershop,Basketball Court,Discount Store,Grocery Store,Pizza Place,Pharmacy,Baseball Field,0
2,Eastchester,40.887556,-73.827806,Caribbean Restaurant,Diner,Metro Station,Donut Shop,Bakery,Automotive Shop,Fast Food Restaurant,Seafood Restaurant,Pizza Place,Business Service,0
3,Fieldston,40.895437,-73.905643,Plaza,River,Playground,Music Venue,Rock Club,Zoo,Falafel Restaurant,Fast Food Restaurant,Farmers Market,Farm,2
4,Riverdale,40.890834,-73.912585,Bank,Gym,Park,Home Service,Bus Station,Plaza,Zoo,Field,Event Space,Exhibit,0


In [73]:
address = 'New Brunswick, New Jersey, US'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New Brunswick City are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of New Brunswick City are 40.4862174, -74.4518173.


In [74]:
ny_phil_merged.dtypes

Neighborhood               object
Latitude                  float64
Longitude                 float64
1st Most Common Venue      object
2nd Most Common Venue      object
3rd Most Common Venue      object
4th Most Common Venue      object
5th Most Common Venue      object
6th Most Common Venue      object
7th Most Common Venue      object
8th Most Common Venue      object
9th Most Common Venue      object
10th Most Common Venue     object
Cluster Labels              int32
dtype: object

In [76]:
# Finally, let's visualize the resulting clusters on map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_phil_merged['Latitude'], ny_phil_merged['Longitude'], ny_phil_merged['Neighborhood'], ny_phil_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

#### Cluster 1

In [99]:
ny_phil_merged.loc[ny_phil_merged['Cluster Labels'] == 0, ny_phil_merged.columns[[0] + list(range(3, ny_phil_merged.shape[1]-1))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Co-op City,Bus Station,Ice Cream Shop,Gift Shop,Salon / Barbershop,Basketball Court,Discount Store,Grocery Store,Pizza Place,Pharmacy,Baseball Field
2,Eastchester,Caribbean Restaurant,Diner,Metro Station,Donut Shop,Bakery,Automotive Shop,Fast Food Restaurant,Seafood Restaurant,Pizza Place,Business Service
4,Riverdale,Bank,Gym,Park,Home Service,Bus Station,Plaza,Zoo,Field,Event Space,Exhibit
5,Kingsbridge,Pizza Place,Bar,Sandwich Place,Discount Store,Mexican Restaurant,Latin American Restaurant,Supermarket,Spanish Restaurant,Donut Shop,Fast Food Restaurant
28,Throgs Neck,Italian Restaurant,Deli / Bodega,Baseball Field,Pizza Place,Asian Restaurant,American Restaurant,Sports Bar,Bar,Juice Bar,Food Truck
30,Parkchester,Supermarket,Pizza Place,Deli / Bodega,Bank,Women's Store,Asian Restaurant,Restaurant,Chinese Restaurant,Shoe Store,Mobile Phone Shop
37,Pelham Bay,Italian Restaurant,Gym / Fitness Center,Convenience Store,Bank,Donut Shop,Sandwich Place,Fast Food Restaurant,Salon / Barbershop,BBQ Joint,Bagel Shop
42,Pelham Gardens,Chinese Restaurant,Spanish Restaurant,Donut Shop,Bus Station,Boat or Ferry,Bank,Grocery Store,BBQ Joint,Pharmacy,Liquor Store
46,Bay Ridge,Italian Restaurant,Pizza Place,Spa,American Restaurant,Bagel Shop,Greek Restaurant,Bar,Chinese Restaurant,Seafood Restaurant,Pharmacy
48,Sunset Park,Bakery,Latin American Restaurant,Mexican Restaurant,Bank,Pizza Place,Gym,Pharmacy,Stadium,Breakfast Spot,Supplement Shop


#### Cluster 2

In [100]:
ny_phil_merged.loc[ny_phil_merged['Cluster Labels'] == 1, ny_phil_merged.columns[[0] + list(range(3, ny_phil_merged.shape[1]-1))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Mott Haven,Gym,Grocery Store,Spanish Restaurant,Pizza Place,Donut Shop,Flower Shop,Baseball Field,Chinese Restaurant,Metro Station,Peruvian Restaurant
39,Edgewater Park,Italian Restaurant,Deli / Bodega,Coffee Shop,Pizza Place,Chinese Restaurant,Asian Restaurant,Spa,Bar,Bakery,Park
49,Greenpoint,Bar,Pizza Place,Cocktail Bar,Boutique,Yoga Studio,Coffee Shop,Café,Mexican Restaurant,Bakery,Record Shop
78,Coney Island,Caribbean Restaurant,Baseball Stadium,Monument / Landmark,Pharmacy,Music Venue,Deli / Bodega,Food Court,Gourmet Shop,Theme Park Ride / Attraction,Other Great Outdoors
149,College Point,Deli / Bodega,Pizza Place,Asian Restaurant,Latin American Restaurant,Pharmacy,Supermarket,Sandwich Place,Mexican Restaurant,Donut Shop,Café
210,Travis,Bowling Alley,Deli / Bodega,Hotel,Donut Shop,Baseball Field,Gym / Fitness Center,Gym,Sports Club,Café,Park
252,Shore Acres,Deli / Bodega,Italian Restaurant,Bus Stop,Intersection,Bar,Nail Salon,Pizza Place,Furniture / Home Store,Supermarket,Music Store
262,Mill Basin,Chinese Restaurant,Pizza Place,Bank,Japanese Restaurant,Bagel Shop,Burger Joint,Italian Restaurant,Peruvian Restaurant,Donut Shop,Middle Eastern Restaurant
272,Hunters Point,Café,Italian Restaurant,Burger Joint,Gym / Fitness Center,Brewery,Sushi Restaurant,Bar,Japanese Restaurant,American Restaurant,Comedy Club
330,Girard Estate,Pizza Place,Liquor Store,Shoe Store,Discount Store,Bakery,Video Game Store,Pharmacy,Donut Shop,Shopping Plaza,Gastropub


#### Cluster 3

In [102]:
ny_phil_merged.loc[ny_phil_merged['Cluster Labels'] == 2, ny_phil_merged.columns[[0] + list(range(3, ny_phil_merged.shape[1]-1))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Wakefield,Sandwich Place,Ice Cream Shop,Donut Shop,Pharmacy,Dessert Shop,Caribbean Restaurant,Food Truck,Food & Drink Shop,Fast Food Restaurant,Ethiopian Restaurant
3,Fieldston,Plaza,River,Playground,Music Venue,Rock Club,Zoo,Falafel Restaurant,Fast Food Restaurant,Farmers Market,Farm
6,Marble Hill,Sandwich Place,Discount Store,Pizza Place,Supplement Shop,Gym,Bank,Shoe Store,Pharmacy,Seafood Restaurant,Donut Shop
7,Woodlawn,Pizza Place,Playground,Pub,Deli / Bodega,Rental Car Location,Park,Bar,Food Truck,Donut Shop,Train Station
8,Norwood,Pizza Place,Park,Bank,Pharmacy,Convenience Store,Supermarket,Spanish Restaurant,Bus Station,Fast Food Restaurant,Mexican Restaurant
9,Williamsbridge,Caribbean Restaurant,Nightclub,Bar,Metro Station,Fast Food Restaurant,Soup Place,Fish & Chips Shop,Exhibit,Factory,Falafel Restaurant
10,Baychester,Discount Store,Mattress Store,American Restaurant,Pizza Place,Sandwich Place,Playground,Mexican Restaurant,Arcade,Pet Store,Fast Food Restaurant
11,Pelham Parkway,Italian Restaurant,Pizza Place,Chinese Restaurant,Metro Station,Smoke Shop,Liquor Store,Sandwich Place,Donut Shop,Home Service,Bus Station
12,City Island,Harbor / Marina,Seafood Restaurant,Thrift / Vintage Store,Spanish Restaurant,Park,Bar,Bank,Pharmacy,Liquor Store,Smoke Shop
13,Bedford Park,Pizza Place,Diner,Convenience Store,Mexican Restaurant,Sandwich Place,Supermarket,Pharmacy,Deli / Bodega,Chinese Restaurant,Fast Food Restaurant


### Conclusion
From the above map and clusters data, it seems like Philadelphia and New York cities are more or less similar to each other.