In [1]:
# import libraries
from bs4 import BeautifulSoup as bs
import requests
import numpy as np
import pandas as pd

In [2]:
# set the soup
wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_page = requests.get(wiki).text
soup = bs(raw_page,'lxml')

In [3]:
# find the table
table = soup.find('table',{'class':'wikitable sortable'})

In [4]:
# set the empty list and organise the content
Postcode = []
Borough = []
Neighbourhood = []

for tr in table.findAll('tr')[1:]:
    tds = tr.findAll('td')
    
    Postcode_var = tds[0].get_text()
    Borough_var = tds[1].get_text()
    Neighbourhood_var = tds[2].get_text().strip('\n')

    if Borough_var == 'Not assigned':
        continue
    else:
        Postcode.append(Postcode_var)
        Borough.append(Borough_var)
        Neighbourhood.append(Neighbourhood_var)

In [5]:
# display the list into dataframe
df = pd.DataFrame()
df['Postcode'] = Postcode
df['Borough'] = Borough
df['Neighbourhood'] = Neighbourhood

In [6]:
# groupby the content by postcode or Borough
# convert the list to dataframe again and set the new organised table
df_u = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(list)
df_new = df_u.to_frame()
df_new.reset_index(level=['Postcode','Borough'],inplace=True)
df_new['Neighbourhood'] = df_new['Neighbourhood'].astype(str)
df_new['Neighbourhood'] = df_new['Neighbourhood'].apply(lambda x: x.replace('[',''))
df_new['Neighbourhood'] = df_new['Neighbourhood'].apply(lambda x: x.replace(']',''))
df_new

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"'Rouge', 'Malvern'"
1,M1C,Scarborough,"'Highland Creek', 'Rouge Hill', 'Port Union'"
2,M1E,Scarborough,"'Guildwood', 'Morningside', 'West Hill'"
3,M1G,Scarborough,'Woburn'
4,M1H,Scarborough,'Cedarbrae'
5,M1J,Scarborough,'Scarborough Village'
6,M1K,Scarborough,"'East Birchmount Park', 'Ionview', 'Kennedy Park'"
7,M1L,Scarborough,"'Clairlea', 'Golden Mile', 'Oakridge'"
8,M1M,Scarborough,"'Cliffcrest', 'Cliffside', 'Scarborough Villag..."
9,M1N,Scarborough,"'Birch Cliff', 'Cliffside West'"


In [7]:
# show the required dataframe and its size/shape
df_new.shape

(103, 3)

In [8]:
# input the latitude and longitude
geo_data = pd.read_csv('https://cocl.us/Geospatial_data')
full_data = pd.concat([df_new,geo_data],axis = 1)
full_data

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"'Rouge', 'Malvern'",M1B,43.806686,-79.194353
1,M1C,Scarborough,"'Highland Creek', 'Rouge Hill', 'Port Union'",M1C,43.784535,-79.160497
2,M1E,Scarborough,"'Guildwood', 'Morningside', 'West Hill'",M1E,43.763573,-79.188711
3,M1G,Scarborough,'Woburn',M1G,43.770992,-79.216917
4,M1H,Scarborough,'Cedarbrae',M1H,43.773136,-79.239476
5,M1J,Scarborough,'Scarborough Village',M1J,43.744734,-79.239476
6,M1K,Scarborough,"'East Birchmount Park', 'Ionview', 'Kennedy Park'",M1K,43.727929,-79.262029
7,M1L,Scarborough,"'Clairlea', 'Golden Mile', 'Oakridge'",M1L,43.711112,-79.284577
8,M1M,Scarborough,"'Cliffcrest', 'Cliffside', 'Scarborough Villag...",M1M,43.716316,-79.239476
9,M1N,Scarborough,"'Birch Cliff', 'Cliffside West'",M1N,43.692657,-79.264848


In [14]:
full_data.drop('Postal Code',axis=1, inplace=True)
full_data

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"'Rouge', 'Malvern'",43.806686,-79.194353
1,M1C,Scarborough,"'Highland Creek', 'Rouge Hill', 'Port Union'",43.784535,-79.160497
2,M1E,Scarborough,"'Guildwood', 'Morningside', 'West Hill'",43.763573,-79.188711
3,M1G,Scarborough,'Woburn',43.770992,-79.216917
4,M1H,Scarborough,'Cedarbrae',43.773136,-79.239476
5,M1J,Scarborough,'Scarborough Village',43.744734,-79.239476
6,M1K,Scarborough,"'East Birchmount Park', 'Ionview', 'Kennedy Park'",43.727929,-79.262029
7,M1L,Scarborough,"'Clairlea', 'Golden Mile', 'Oakridge'",43.711112,-79.284577
8,M1M,Scarborough,"'Cliffcrest', 'Cliffside', 'Scarborough Villag...",43.716316,-79.239476
9,M1N,Scarborough,"'Birch Cliff', 'Cliffside West'",43.692657,-79.264848


In [9]:
# import the relevant libraries
import geocoder
import folium

In [11]:
# create the map of Toronto
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to the map
for lat, lng, borough, neighborhood in zip(full_data['Latitude'], full_data['Longitude'], full_data['Borough'], full_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

In [12]:
# Create a new data frame with neighborhoods in North York

# import the foursquare data resources
CLIENT_ID = 'CR1TOIJLC1RMUIU1KPJ5K4AJH1YAVSAFLOMNYQEM5KY3ISQG' 
CLIENT_SECRET = 'ZU1FQJ0GFH4J5WC2W3TA3O4NEIGRPS3DRVJZ0NXLU50F5HS1'
VERSION = '20190603'

# display the North York data explicitly
NorthYork_data = full_data[full_data['Borough'] == 'North York'].reset_index(drop=True)
NorthYork_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M2H,North York,'Hillcrest Village',M2H,43.803762,-79.363452
1,M2J,North York,"'Fairview', 'Henry Farm', 'Oriole'",M2J,43.778517,-79.346556
2,M2K,North York,'Bayview Village',M2K,43.786947,-79.385975
3,M2L,North York,"'Silver Hills', 'York Mills'",M2L,43.75749,-79.374714
4,M2M,North York,"'Newtonbrook', 'Willowdale'",M2M,43.789053,-79.408493


In [17]:
address_ny = 'North York, Toronto'
latitude_ny = 43.803762
longitude_ny = -79.363452

map_ny = folium.Map(location=[latitude_ny, longitude_ny], zoom_start=12)

# add markers to map
for lat, lng, label in zip(NorthYork_data['Latitude'], NorthYork_data['Longitude'], NorthYork_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ny)  
    
map_ny

In [18]:
# Get the top 100 venues in the neighborhood 'Hillcrest Village', from North York

neighborhood_latitude = NorthYork_data.loc[0, 'Latitude'] # neighbourhood latitude value
neighborhood_longitude = NorthYork_data.loc[0, 'Longitude'] # neighbourhood longitude value
neighborhood_name = NorthYork_data.loc[0, 'Neighbourhood'] # neighbourhood name

LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_ny, longitude_ny, VERSION, radius, LIMIT)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cf523396a60714939e92e02'},
 'response': {'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.8082620045, 'lng': -79.35722848539913},
   'sw': {'lat': 43.7992619955, 'lng': -79.36967551460086}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad9dce6f964a520651b21e3',
       'name': "Eagle's Nest Golf Club",
       'location': {'address': '10000 Dufferin Rd',
        'lat': 43.805454826002794,
        'lng': -79.36418592243415,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.805454826002794,
          'lng': -79.36418592243415}],
        'distance': 197,
        'cc': 'CA',
        'city': 'Toronto',
    

In [19]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

    
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']  
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Eagle's Nest Golf Club,Golf Course,43.805455,-79.364186
1,AY Jackson Pool,Pool,43.804515,-79.366138
2,Villa Madina,Mediterranean Restaurant,43.801685,-79.363938
3,Duncan Creek Park,Dog Run,43.805539,-79.360695
4,A.Y. Jackson Secondary School Track,Athletics & Sports,43.805068,-79.366677


In [20]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


In [21]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
# Get venues for each neighborhood in North York
NorthYork_venues = getNearbyVenues(names=NorthYork_data['Neighbourhood'],
                                   latitudes=NorthYork_data['Latitude'],
                                   longitudes=NorthYork_data['Longitude']
                                  )
NorthYork_venues.head()

'Hillcrest Village'
'Fairview', 'Henry Farm', 'Oriole'
'Bayview Village'
'Silver Hills', 'York Mills'
'Newtonbrook', 'Willowdale'
'Willowdale South'
'York Mills West'
'Willowdale West'
'Parkwoods'
'Don Mills North'
'Flemingdon Park', 'Don Mills South'
'Bathurst Manor', 'Downsview North', 'Wilson Heights'
'Northwood Park', 'York University'
'CFB Toronto', 'Downsview East'
'Downsview West'
'Downsview Central'
'Downsview Northwest'
'Victoria Village'
'Bedford Park', 'Lawrence Manor East'
'Lawrence Heights', 'Lawrence Manor'
'Glencairn'
'Downsview', 'North Park', 'Upwood Park'
'Humber Summit'
'Emery', 'Humberlea'


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,'Hillcrest Village',43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,'Hillcrest Village',43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
2,'Hillcrest Village',43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
3,'Hillcrest Village',43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run
4,'Hillcrest Village',43.803762,-79.363452,A.Y. Jackson Secondary School Track,43.805068,-79.366677,Athletics & Sports


In [24]:
NorthYork_venues.groupby('Neighborhood').count()
print('There are {} uniques categories.'.format(len(NorthYork_venues['Venue Category'].unique())))

There are 106 uniques categories.


In [25]:
# one hot encoding
ny_onehot = pd.get_dummies(NorthYork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_onehot['Neighborhood'] = NorthYork_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
ny_onehot = ny_onehot[fixed_columns]

ny_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,'Hillcrest Village',0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,'Hillcrest Village',0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,'Hillcrest Village',0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,'Hillcrest Village',0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,'Hillcrest Village',0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
ny_onehot.shape

(240, 107)

In [27]:
ny_grouped = ny_onehot.groupby('Neighborhood').mean().reset_index()
ny_grouped.head(7)

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Video Store,Vietnamese Restaurant,Wings Joint,Women's Store
0,"'Bathurst Manor', 'Downsview North', 'Wilson H...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,...,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0
1,'Bayview Village',0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"'Bedford Park', 'Lawrence Manor East'",0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,...,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"'CFB Toronto', 'Downsview East'",0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,'Don Mills North',0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,'Downsview Central',0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,'Downsview Northwest',0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
# Get top 10 venues per neighborhood

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_grouped['Neighborhood']

for ind in np.arange(ny_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"'Bathurst Manor', 'Downsview North', 'Wilson H...",Coffee Shop,Sandwich Place,Supermarket,Pharmacy,Pizza Place,Deli / Bodega,Diner,Bridal Shop,Restaurant,Shopping Mall
1,'Bayview Village',Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Event Space,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant
2,"'Bedford Park', 'Lawrence Manor East'",Coffee Shop,Fast Food Restaurant,Italian Restaurant,Sandwich Place,Indian Restaurant,Liquor Store,Café,Pharmacy,Pizza Place,Butcher
3,"'CFB Toronto', 'Downsview East'",Snack Place,Other Repair Shop,Park,Airport,Golf Course,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
4,'Don Mills North',Gym / Fitness Center,Caribbean Restaurant,Café,Japanese Restaurant,Basketball Court,Event Space,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant
5,'Downsview Central',Home Service,Baseball Field,Food Truck,Korean Restaurant,Women's Store,Event Space,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant
6,'Downsview Northwest',Gym / Fitness Center,Athletics & Sports,Liquor Store,Discount Store,Grocery Store,Asian Restaurant,Arts & Crafts Store,Deli / Bodega,Department Store,Dim Sum Restaurant
7,'Downsview West',Grocery Store,Shopping Mall,Hotel,Bank,Park,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
8,"'Downsview', 'North Park', 'Upwood Park'",Park,Bakery,Basketball Court,Construction & Landscaping,Event Space,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant
9,"'Emery', 'Humberlea'",Baseball Field,Women's Store,Fast Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant,Diner,Discount Store


In [32]:
# Run k-means to cluster the neighborhoods into 5 clusters

# import k-means from clustering stage
from sklearn.cluster import KMeans

ny_data = NorthYork_data.drop(16)
# set number of clusters
kclusters = 5

ny_grouped_clustering = ny_grouped.drop('Neighborhood', 1)


# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
#len(kmeans.labels_)#=16
#scarborough_data.shape

array([1, 0, 1, 1, 0, 1, 1, 1, 4, 3], dtype=int32)

In [44]:
kmeans.labels_1 = np.append(kmeans.labels_,0)
kmeans.labels_1

array([1, 0, 1, 1, 0, 1, 1, 1, 4, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 4,
       0])

In [46]:
ny_merged = ny_data

# add clustering labels
ny_merged['Cluster Labels'] = kmeans.labels_1

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

ny_merged

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,'Hillcrest Village',M2H,43.803762,-79.363452,1,Golf Course,Pool,Athletics & Sports,Mediterranean Restaurant,Dog Run,Women's Store,Empanada Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega
1,M2J,North York,"'Fairview', 'Henry Farm', 'Oriole'",M2J,43.778517,-79.346556,0,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Asian Restaurant,Food Court,Japanese Restaurant,Bakery,Women's Store,Tea Room
2,M2K,North York,'Bayview Village',M2K,43.786947,-79.385975,1,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Event Space,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant
3,M2L,North York,"'Silver Hills', 'York Mills'",M2L,43.75749,-79.374714,1,,,,,,,,,,
4,M2M,North York,"'Newtonbrook', 'Willowdale'",M2M,43.789053,-79.408493,0,,,,,,,,,,
5,M2N,North York,'Willowdale South',M2N,43.77012,-79.408493,1,Restaurant,Ramen Restaurant,Coffee Shop,Café,Japanese Restaurant,Sushi Restaurant,Sandwich Place,Ice Cream Shop,Lounge,Middle Eastern Restaurant
6,M2P,North York,'York Mills West',M2P,43.752758,-79.400049,1,Park,Convenience Store,Bank,Women's Store,Event Space,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant,Diner
7,M2R,North York,'Willowdale West',M2R,43.782736,-79.442259,1,Pharmacy,Butcher,Discount Store,Coffee Shop,Pizza Place,Grocery Store,Athletics & Sports,Bakery,Cosmetics Shop,Deli / Bodega
8,M3A,North York,'Parkwoods',M3A,43.753259,-79.329656,4,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Empanada Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant
9,M3B,North York,'Don Mills North',M3B,43.745906,-79.352188,3,Gym / Fitness Center,Caribbean Restaurant,Café,Japanese Restaurant,Basketball Court,Event Space,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant


In [47]:
# Visualize the clusters in the map

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location = [latitude_ny, longitude_ny], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_merged['Latitude'], ny_merged['Longitude'], ny_merged['Neighbourhood'], ny_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [48]:
# Examine each of the five clusters

ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

ny_merged.loc[ny_merged['Cluster Labels'] == 3, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

ny_merged.loc[ny_merged['Cluster Labels'] == 4, ny_merged.columns[[1] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,North York,-79.329656,4,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Empanada Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant
22,North York,-79.565963,4,Pizza Place,Empanada Restaurant,Coffee Shop,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant,Diner
