In [1]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium 
from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors

import requests
from pandas import json_normalize

In [2]:
df_original = pd.read_html('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=1011037969')[0]
df_original

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [3]:
df_original.columns=['PostalCode', 'Borough', 'Neighborhood']
df_original.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
df_original.shape

(180, 3)

In [5]:
df_original.loc[df_original['Borough'] == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
7,M8A,Not assigned,Not assigned
10,M2B,Not assigned,Not assigned
15,M7B,Not assigned,Not assigned
...,...,...,...
174,M4Z,Not assigned,Not assigned
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned


In [6]:
new_df = df_original.drop(df_original.loc[df_original['Borough'] == 'Not assigned'].index)

# reseting the index and dropping the "index" column
new_df.reset_index(drop=True, inplace=True)

new_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [7]:
new_df.shape

(103, 3)

In [8]:
toronto_df_new = new_df
toronto_df_new

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [9]:
coordinates_df = pd.read_csv('https://cocl.us/Geospatial_data')

# renaming the "Postal Code" column
coordinates_df.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
coordinates_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
# merging the dataframes
df = pd.merge(toronto_df_new, coordinates_df, on='PostalCode')
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [12]:
print(f'New dataframe has {df.shape[0]} rows and {df.shape[1]} columns')

New dataframe has 103 rows and 5 columns


In [13]:
#check the Toronto's Borough
df['Borough'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Toronto/York', 'Mississauga'], dtype=object)

In [14]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print(f'The geographical coordinates of Toronto is {latitude}, {longitude}')

The geographical coordinates of Toronto is 43.6534817, -79.3839347


In [15]:
#create Toronto map with neighborhoods on
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

#put markers on map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = f'{neighborhood}, {borough}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    location=[lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(toronto_map)

toronto_map

In [16]:
#define Foursquare Credentials and Version
CLIENT_ID = 'HGDOTKX5IGFDDDPZJSSH0MYYJ3HDT02PBPI4TDWEXPAVLSWG' # your Foursquare ID
CLIENT_SECRET = 'EP0ARLHFIV2SCICUGR5KRKS1MAGXMMW31NJ2RLK1HCYY2HY3' # your Foursquare Secret
ACCESS_TOKEN = 'BPM30FWDVO03BDC3JHDGIKHU4XQV1TTH2EVOFNNRZCFC5F0V' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: HGDOTKX5IGFDDDPZJSSH0MYYJ3HDT02PBPI4TDWEXPAVLSWG
CLIENT_SECRET:EP0ARLHFIV2SCICUGR5KRKS1MAGXMMW31NJ2RLK1HCYY2HY3


**Now, let's get the top 100 venues that are within a radius of 500 meters.**

In [19]:
radius = 500
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)          

In [20]:
toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                latitudes=df['Latitude'],
                                longitudes=df['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [21]:
toronto_venues.shape

(2124, 7)

In [22]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [23]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",7,7,7,7,7,7
"Bathurst Manor, Wilson Heights, Downsview North",23,23,23,23,23,23
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
...,...,...,...,...,...,...
"Willowdale, Willowdale East",34,34,34,34,34,34
"Willowdale, Willowdale West",5,5,5,5,5,5
Woburn,4,4,4,4,4,4
Woodbine Heights,8,8,8,8,8,8


In [24]:
print(f'{toronto_venues["Venue Category"].nunique()} are the categories')

270 are the categories


In [25]:
toronto_venues['Venue Category'].unique()[:100]

array(['Park', 'Convenience Store', 'Food & Drink Shop', 'Hockey Arena',
       'Portuguese Restaurant', 'Coffee Shop', 'French Restaurant',
       'Intersection', 'Pizza Place', 'Bakery', 'Distribution Center',
       'Spa', 'Restaurant', 'Pub', 'Breakfast Spot',
       'Gym / Fitness Center', 'Historic Site', 'Chocolate Shop',
       'Farmers Market', 'Performing Arts Venue', 'Greek Restaurant',
       'Café', 'Dessert Shop', 'Yoga Studio', 'Mexican Restaurant',
       'Theater', 'Event Space', 'Shoe Store', 'Art Gallery',
       'Electronics Store', 'Cosmetics Shop', 'Bank', 'Beer Store',
       'Antique Shop', 'Boutique', 'Furniture / Home Store',
       'Vietnamese Restaurant', 'Clothing Store', 'Accessories Store',
       'Gift Shop', 'Italian Restaurant', 'Creperie', 'Beer Bar',
       'Burrito Place', 'Diner', 'Sushi Restaurant',
       'Fried Chicken Joint', 'Japanese Restaurant', 'Smoothie Shop',
       'Sandwich Place', 'Gym', 'Bar', 'College Auditorium',
       'Music Venue

In [29]:
#check if results contain european restaurants
#check if the results contain "French Restaurants"
"French Restaurant" in toronto_venues['Venue Category'].unique()

True

In [30]:
#check if the results contain "Italian Restaurants"
"Italian Restaurant" in toronto_venues['Venue Category'].unique()

True

In [31]:
#check if the results contain "Spanish Restaurants"
"Spanish Restaurant" in toronto_venues['Venue Category'].unique()

False

In [32]:
#check if the results contain "Portuguese Restaurants"
"Portuguese Restaurant" in toronto_venues['Venue Category'].unique()

True

In [33]:
#check if the results contain "English Restaurants"
"English Restaurant" in toronto_venues['Venue Category'].unique()

False

In [34]:
#one hot encoding
toronto_one_hot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_one_hot.drop('Neighborhood', axis=1, inplace=True)

# add neighborhood column back to dataframe
toronto_one_hot['Neighborhood'] = toronto_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [toronto_one_hot.columns[-1]] + list(toronto_one_hot.columns[:-1])
toronto_one_hot = toronto_one_hot[fixed_columns]

toronto_one_hot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
toronto_one_hot.shape

(2124, 270)

In [36]:
#group by neighborhood
toronto_grouped = toronto_one_hot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
toronto_grouped.shape

(95, 270)

In [52]:
#find neighborhood along the first 5 venues
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("-----"+hood+"-----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

-----Agincourt-----
                       venue  freq
0                     Lounge  0.25
1  Latin American Restaurant  0.25
2             Breakfast Spot  0.25
3               Skating Rink  0.25
4              Metro Station  0.00


-----Alderwood, Long Branch-----
            venue  freq
0     Pizza Place  0.29
1             Pub  0.14
2             Gym  0.14
3  Sandwich Place  0.14
4    Skating Rink  0.14


-----Bathurst Manor, Wilson Heights, Downsview North-----
                       venue  freq
0                       Bank  0.09
1                Coffee Shop  0.09
2              Shopping Mall  0.04
3  Middle Eastern Restaurant  0.04
4             Sandwich Place  0.04


-----Bayview Village-----
                 venue  freq
0                 Café  0.25
1  Japanese Restaurant  0.25
2                 Bank  0.25
3   Chinese Restaurant  0.25
4                Motel  0.00


-----Bedford Park, Lawrence Manor East-----
                  venue  freq
0        Sandwich Place  0.08
1    Italian 

In [45]:
len(toronto_grouped[toronto_grouped["French Restaurant"] > 0])

11

In [46]:
len(toronto_grouped[toronto_grouped["Italian Restaurant"] > 0])

26

In [47]:
len(toronto_grouped[toronto_grouped["Portuguese Restaurant"] > 0])

3

In [50]:
#create a new dataframe with those european reastaurant
toronto_european = toronto_grouped[["Neighborhood","French Restaurant","Italian Restaurant","Portuguese Restaurant"]]

In [51]:
toronto_european.head()

Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant
0,Agincourt,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.086957,0.0


In [52]:
toronto_european.shape

(95, 4)

In [53]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [54]:
#create a new dataframe with top venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according the number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Latin American Restaurant,Breakfast Spot,Skating Rink,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Dance Studio,Pub,Sandwich Place,Gym,Electronics Store,Escape Room,Eastern European Restaurant,Dumpling Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Ice Cream Shop,Sandwich Place,Sushi Restaurant,Intersection,Middle Eastern Restaurant,Deli / Bodega,Mobile Phone Shop,Diner
3,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Yoga Studio,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Italian Restaurant,Pharmacy,Butcher,Restaurant,Café,Pub,Pizza Place,Comfort Food Restaurant


In [55]:
#clustering
kclusters = 5

toronto_grouped_clustering = toronto_european.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 0, 1, 1, 4, 1, 1], dtype=int32)

In [56]:
toronto_merged = toronto_european.copy()

In [57]:
toronto_merged["Cluster Labels"] = kmeans.labels_

In [60]:
toronto_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
toronto_merged.head()

Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels
0,Agincourt,0.0,0.0,0.0,1
1,"Alderwood, Long Branch",0.0,0.0,0.0,1
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,1
3,Bayview Village,0.0,0.0,0.0,1
4,"Bedford Park, Lawrence Manor East",0.0,0.086957,0.0,0


In [61]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(toronto_venues.set_index("Neighborhood"), on="Neighborhood")

print(toronto_merged.shape)
toronto_merged.head()

(2124, 11)


Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Agincourt,0.0,0.0,0.0,1,43.7942,-79.262029,Panagio's Breakfast & Lunch,43.79237,-79.260203,Breakfast Spot
0,Agincourt,0.0,0.0,0.0,1,43.7942,-79.262029,El Pulgarcito,43.792648,-79.259208,Latin American Restaurant
0,Agincourt,0.0,0.0,0.0,1,43.7942,-79.262029,Twilight,43.791999,-79.258584,Lounge
0,Agincourt,0.0,0.0,0.0,1,43.7942,-79.262029,Commander Arena,43.794867,-79.267989,Skating Rink
1,"Alderwood, Long Branch",0.0,0.0,0.0,1,43.602414,-79.543484,Il Paesano Pizzeria & Restaurant,43.60128,-79.545028,Pizza Place


In [62]:
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged

(2124, 11)


Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
16,"Clarks Corners, Tam O'Shanter, Sullivan",0.000000,0.066667,0.000000,0,43.781638,-79.304302,Rexall,43.780900,-79.298764,Pharmacy
80,"The Danforth West, Riverdale",0.000000,0.069767,0.000000,0,43.679557,-79.352188,Messini Authentic Gyros,43.677704,-79.350480,Greek Restaurant
80,"The Danforth West, Riverdale",0.000000,0.069767,0.000000,0,43.679557,-79.352188,Dolce Gelato,43.677773,-79.351187,Ice Cream Shop
80,"The Danforth West, Riverdale",0.000000,0.069767,0.000000,0,43.679557,-79.352188,La Diperie,43.677702,-79.352265,Ice Cream Shop
80,"The Danforth West, Riverdale",0.000000,0.069767,0.000000,0,43.679557,-79.352188,Cafe Fiorentina,43.677743,-79.350115,Italian Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,Midi Bistro,43.655871,-79.392091,French Restaurant
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,freshii,43.661286,-79.383964,Salad Place
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,Starbucks,43.659509,-79.382132,Coffee Shop
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,TD Canada Trust,43.660843,-79.384886,Bank


In [63]:
# import matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [65]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Neighborhood Latitude'], toronto_merged['Neighborhood Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster))
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [66]:
map_clusters.save('map_clusters.html')

In [74]:
#cluster examination
#cluster 0
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
16,"Clarks Corners, Tam O'Shanter, Sullivan",0.0,0.066667,0.0,0,43.781638,-79.304302,Rexall,43.780900,-79.298764,Pharmacy
80,"The Danforth West, Riverdale",0.0,0.069767,0.0,0,43.679557,-79.352188,Messini Authentic Gyros,43.677704,-79.350480,Greek Restaurant
80,"The Danforth West, Riverdale",0.0,0.069767,0.0,0,43.679557,-79.352188,Dolce Gelato,43.677773,-79.351187,Ice Cream Shop
80,"The Danforth West, Riverdale",0.0,0.069767,0.0,0,43.679557,-79.352188,La Diperie,43.677702,-79.352265,Ice Cream Shop
80,"The Danforth West, Riverdale",0.0,0.069767,0.0,0,43.679557,-79.352188,Cafe Fiorentina,43.677743,-79.350115,Italian Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
41,"India Bazaar, The Beaches West",0.0,0.055556,0.0,0,43.668999,-79.315572,Casa di Giorgio,43.666645,-79.315204,Italian Restaurant
41,"India Bazaar, The Beaches West",0.0,0.055556,0.0,0,43.668999,-79.315572,Pet Valu,43.666979,-79.314665,Pet Store
41,"India Bazaar, The Beaches West",0.0,0.055556,0.0,0,43.668999,-79.315572,Murphy's Law,43.667319,-79.312656,Pub
41,"India Bazaar, The Beaches West",0.0,0.055556,0.0,0,43.668999,-79.315572,The Tulip Steakhouse,43.666348,-79.316854,Steakhouse


In [78]:
#cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
53,"New Toronto, Mimico South, Humber Bay Shores",0.0,0.00,0.0,1,43.605647,-79.501321,New Toronto Fish & Chips,43.601849,-79.503281,Restaurant
53,"New Toronto, Mimico South, Humber Bay Shores",0.0,0.00,0.0,1,43.605647,-79.501321,Domino's Pizza,43.601583,-79.500905,Pizza Place
29,"First Canadian Place, Underground city",0.0,0.01,0.0,1,43.648429,-79.382280,Earls Kitchen & Bar,43.647946,-79.383706,Bar
30,"Forest Hill North & West, Forest Hill Road Park",0.0,0.00,0.0,1,43.696948,-79.411307,Kay Gardner Beltline Trail,43.698446,-79.406873,Trail
30,"Forest Hill North & West, Forest Hill Road Park",0.0,0.00,0.0,1,43.696948,-79.411307,Nikko Sushi Japenese Restaurant,43.700443,-79.407957,Sushi Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
28,"Fairview, Henry Farm, Oriole",0.0,0.00,0.0,1,43.778517,-79.346556,Little Burgundy,43.777960,-79.343816,Shoe Store
28,"Fairview, Henry Farm, Oriole",0.0,0.00,0.0,1,43.778517,-79.346556,Forever XXI,43.778290,-79.343302,Clothing Store
28,"Fairview, Henry Farm, Oriole",0.0,0.00,0.0,1,43.778517,-79.346556,Virgin Mobile,43.777768,-79.343594,Mobile Phone Shop
28,"Fairview, Henry Farm, Oriole",0.0,0.00,0.0,1,43.778517,-79.346556,Bentley,43.778241,-79.343539,Luggage Store


In [82]:
#cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
85,Victoria Village,0.166667,0.0,0.166667,2,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
85,Victoria Village,0.166667,0.0,0.166667,2,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
85,Victoria Village,0.166667,0.0,0.166667,2,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
85,Victoria Village,0.166667,0.0,0.166667,2,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant
85,Victoria Village,0.166667,0.0,0.166667,2,43.725882,-79.315572,Pizza Nova,43.725824,-79.31286,Pizza Place
85,Victoria Village,0.166667,0.0,0.166667,2,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,43.726086,-79.31362,Intersection


In [70]:
#cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
32,Glencairn,0.0,0.25,0.0,3,43.709577,-79.445073,"R Bakery - Delicious Cakes, Breads",43.70742,-79.443126,Bakery
32,Glencairn,0.0,0.25,0.0,3,43.709577,-79.445073,Miyako Sushi Restaurant,43.709111,-79.44393,Japanese Restaurant
32,Glencairn,0.0,0.25,0.0,3,43.709577,-79.445073,Domino's Pizza,43.70717,-79.442658,Pizza Place
32,Glencairn,0.0,0.25,0.0,3,43.709577,-79.445073,Bigabaldi's Pizzera,43.706021,-79.442403,Italian Restaurant


In [71]:
#cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4]

Unnamed: 0,Neighborhood,French Restaurant,Italian Restaurant,Portuguese Restaurant,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
75,Stn A PO Boxes,0.010101,0.030303,0.000000,4,43.646435,-79.374846,Loblaws,43.645427,-79.369789,Grocery Store
75,Stn A PO Boxes,0.010101,0.030303,0.000000,4,43.646435,-79.374846,HI-Toronto Hostel,43.650623,-79.375190,Hostel
75,Stn A PO Boxes,0.010101,0.030303,0.000000,4,43.646435,-79.374846,Starbucks,43.647193,-79.380957,Coffee Shop
75,Stn A PO Boxes,0.010101,0.030303,0.000000,4,43.646435,-79.374846,Bindia Indian Bistro,43.648559,-79.371816,Indian Restaurant
75,Stn A PO Boxes,0.010101,0.030303,0.000000,4,43.646435,-79.374846,Joe Fresh,43.644285,-79.369771,Clothing Store
...,...,...,...,...,...,...,...,...,...,...,...
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,Midi Bistro,43.655871,-79.392091,French Restaurant
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,freshii,43.661286,-79.383964,Salad Place
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,Starbucks,43.659509,-79.382132,Coffee Shop
13,Central Bay Street,0.016393,0.049180,0.016393,4,43.657952,-79.387383,TD Canada Trust,43.660843,-79.384886,Bank
