# Segmenting and Clustering Neighborhoods in Toronto

Overview: Implemention to explore, segment, and cluster the neighborhoods in the city of Toronto
    Toronto neighborhood data will be collected from Wikipedia and the neighborhood info will be collected thru' the Foursquare API

## Part 1 -  Data Wrangling

### Install the required packages

In [1]:
#!conda install -c conda-forge beautifulsoup4 --yes 
#!conda install -c conda-forge lxml --yes 
#!conda install -c conda-forge requests --yes 
#!conda install -c conda-forge geocoder --yes 
#!conda install -c conda-forge folium --yes 

### Import all the required packages

In [2]:
from bs4 import BeautifulSoup
import requests
import urllib.request
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans

### import the data from the URL. Parse it using Beautifulsoup to get the table

In [3]:
fp = urllib.request.urlopen("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
mybytes = fp.read()

mystr = mybytes.decode("utf8")
fp.close()

In [4]:
soup = BeautifulSoup(mystr,'lxml')
match=soup.find('table', class_='wikitable sortable')

### Create a data frame from the table

In [5]:
dataFrameList = pd.read_html(str(match))
neighborhoods = dataFrameList[0]
neighborhoods.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [6]:
#Cleanup step 1: Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

assignedBoroughs = neighborhoods['Borough']!='Not assigned'
neighborhoods = neighborhoods[assignedBoroughs]

In [7]:
#cleanup step 2: Combine rows with the same neighborhood into one row with the neighborhoods separated with a comma

neighborhoods = neighborhoods.groupby(['Postcode', 'Borough'], as_index=False, sort=False).agg(','.join)

In [8]:
#cleanup step 3:  If a cell has a borough but a Not assigned neighborhood, then assign borough to neighbourhood

neighborhoods['Neighbourhood'] = np.where(neighborhoods['Neighbourhood'] == 'Not assigned', neighborhoods['Borough'], neighborhoods['Neighbourhood'])

#Rename neighbourhood column to be consistent
neighborhoods.rename(columns={"Neighbourhood": "Neighborhood"}, inplace = True)

In [9]:
neighborhoods

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [10]:
neighborhoods.shape

(103, 3)

## Part 2 - Adding Geocoding

In [11]:
#import geocoder

# initialize your variable to None
#lat_lng_coords = None

# loop until you get the coordinates
#while(lat_lng_coords is None):

#  g = geocoder.google('{}, Toronto, Ontario'.format('M5G'))
#  lat_lng_coords = g.latlng

#latitude = lat_lng_coords[0]
#longitude = lat_lng_coords[1]
#print (latitude)
#print (longitude)

### Geocoder failing to return the coordinates after multiple retries. Continuing implementation the csv file

In [12]:
long_lat_df = pd.read_csv('http://cocl.us/Geospatial_data')
#long_lat_df.rename(columns={"Postal Code": "Postcode"}, inplace = True)
long_lat_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
neighborhoods = neighborhoods.merge(long_lat_df, left_on='Postcode', right_on='Postal Code')
neighborhoods.drop(['Postal Code'], axis=1)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937


In [14]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, ON are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, ON are 43.653963, -79.387207.


### Create a map of Toronto with neighbourhoods superimposed on top

In [15]:
import folium

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Initialize foursquare api credentials in a hidden cell

In [16]:
# The code was removed by Watson Studio for sharing.

In [17]:
LIMIT = 10 # limit of number of venues returned by Foursquare API
VERSION = '20180605' # Foursquare API version



In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
toronto_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Parkwoods
Victoria Village
Harbourfront
Lawrence Heights,Lawrence Manor
Queen's Park
Queen's Park
Rouge,Malvern
Don Mills North
Woodbine Gardens,Parkview Hill
Ryerson,Garden District
Glencairn
Cloverdale,Islington,Martin Grove,Princess Gardens,West Deane Park
Highland Creek,Rouge Hill,Port Union
Flemingdon Park,Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens,Eringate,Markland Wood,Old Burnhamthorpe
Guildwood,Morningside,West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor,Downsview North,Wilson Heights
Thorncliffe Park
Adelaide,King,Richmond
Dovercourt Village,Dufferin
Scarborough Village
Fairview,Henry Farm,Oriole
Northwood Park,York University
East Toronto
Harbourfront East,Toronto Islands,Union Station
Little Portugal,Trinity
East Birchmount Park,Ionview,Kennedy Park
Bayview Village
CFB Toronto,Downsview East
The Danforth West,Riverdale
Design Exchange,Toronto 

In [20]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
3,Parkwoods,43.753259,-79.329656,Corrosion Service Company Limited,43.752432,-79.334661,Construction & Landscaping
4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena


In [21]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",10,10,10,10,10,10
Agincourt,4,4,4,4,4,4
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",2,2,2,2,2,2
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",10,10,10,10,10,10
"Alderwood,Long Branch",9,9,9,9,9,9
"Bathurst Manor,Downsview North,Wilson Heights",10,10,10,10,10,10
Bayview Village,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East",10,10,10,10,10,10
Berczy Park,10,10,10,10,10,10
"Birch Cliff,Cliffside West",4,4,4,4,4,4


## Analyze each neighborhood

In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Toy / Game Store,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [23]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Terminal,American Restaurant,Arts & Crafts Store,Asian Restaurant,...,Toy / Game Store,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.1,0.000000,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.100000,0.0,0.0,0.0,0.0,0.0
4,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
5,"Bathurst Manor,Downsview North,Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
6,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
7,"Bedford Park,Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
8,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.1,0.000000,0.0,0.0,0.0,0.0,0.0
9,"Birch Cliff,Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


### print each neighborhood along with the top 5 most common venues

In [24]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
          venue  freq
0    Steakhouse   0.2
1  Concert Hall   0.1
2     Speakeasy   0.1
3         Hotel   0.1
4         Plaza   0.1


----Agincourt----
                       venue  freq
0             Breakfast Spot  0.25
1                     Lounge  0.25
2  Latin American Restaurant  0.25
3               Skating Rink  0.25
4             Medical Center  0.00


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
            venue  freq
0      Playground   0.5
1            Park   0.5
2     Yoga Studio   0.0
3           Motel   0.0
4  Medical Center   0.0


----Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown----
           venue  freq
0  Grocery Store   0.2
1    Pizza Place   0.1
2    Coffee Shop   0.1
3       Pharmacy   0.1
4     Beer Store   0.1


----Alderwood,Long Branch----
          venue  freq
0   Pizza Place  0.22
1      Pharmacy  0.11
2           Gym  0.11
3  Skating Rink  0.11
4   Co

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Steakhouse,Coffee Shop,Vegetarian / Vegan Restaurant,Speakeasy,Hotel,Plaza,Café,Opera House,Concert Hall,Cosmetics Shop
1,Agincourt,Breakfast Spot,Latin American Restaurant,Skating Rink,Lounge,Diner,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Women's Store,Deli / Bodega,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pharmacy,Coffee Shop,Video Store,Pizza Place,Fast Food Restaurant,Beer Store,Sandwich Place,Fried Chicken Joint,Department Store
4,"Alderwood,Long Branch",Pizza Place,Pharmacy,Sandwich Place,Pub,Athletics & Sports,Skating Rink,Gym,Coffee Shop,Construction & Landscaping,Department Store


In [27]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
print (kmeans.labels_[0:10])
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

[0 1 3 0 0 0 1 0 1 1]


In [28]:
# add clustering labels

print (neighborhoods_venues_sorted['Cluster Labels'])
toronto_merged = neighborhoods

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood', how='right')
print (toronto_merged['Cluster Labels'])
toronto_merged.head()

0     0
1     1
2     3
3     0
4     0
5     0
6     1
7     0
8     1
9     1
10    0
11    0
12    1
13    3
14    0
15    1
16    3
17    0
18    1
19    0
20    0
21    0
22    1
23    1
24    1
25    1
26    0
27    0
28    1
29    0
     ..
70    0
71    0
72    3
73    0
74    3
75    1
76    1
77    0
78    1
79    1
80    1
81    0
82    1
83    0
84    0
85    3
86    1
87    1
88    1
89    1
90    1
91    0
92    0
93    3
94    0
95    0
96    0
97    1
98    1
99    3
Name: Cluster Labels, Length: 100, dtype: int32
0      3
1      0
2      1
3      0
4      0
5      0
6      1
7      1
8      1
9      1
10     1
12     2
13     1
14     1
15     0
16     3
17     0
18     1
19     3
20     1
21     3
22     0
23     0
24     0
25     0
26     1
27     1
28     0
29     1
30     0
      ..
72     0
73     1
74     0
75     0
76     0
77     1
78     1
79     0
80     1
81     0
82     1
83     1
84     0
85     3
86     0
87     0
88     0
89     0
90     0
91     3
92   

Unnamed: 0,Postcode,Borough,Neighborhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656,3,Bus Stop,Construction & Landscaping,Park,Food & Drink Shop,Women's Store,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572,0,Intersection,Pizza Place,Coffee Shop,Hockey Arena,Portuguese Restaurant,Women's Store,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
2,M5A,Downtown Toronto,Harbourfront,M5A,43.65426,-79.360636,1,Breakfast Spot,Historic Site,Park,Spa,Restaurant,Bakery,Pub,Gym / Fitness Center,Coffee Shop,Creperie
3,M6A,North York,"Lawrence Heights,Lawrence Manor",M6A,43.718518,-79.464763,0,Furniture / Home Store,Accessories Store,Coffee Shop,Miscellaneous Shop,Event Space,Clothing Store,Boutique,Vietnamese Restaurant,Creperie,Diner
4,M7A,Downtown Toronto,Queen's Park,M7A,43.662301,-79.389494,0,Coffee Shop,Yoga Studio,Burrito Place,Italian Restaurant,Creperie,Park,Portuguese Restaurant,Gym,Arts & Crafts Store,Field


In [29]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide,King,Richmond",Steakhouse,Coffee Shop,Vegetarian / Vegan Restaurant,Speakeasy,Hotel,Plaza,Café,Opera House,Concert Hall,Cosmetics Shop
1,1,Agincourt,Breakfast Spot,Latin American Restaurant,Skating Rink,Lounge,Diner,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
2,3,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Women's Store,Deli / Bodega,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
3,0,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pharmacy,Coffee Shop,Video Store,Pizza Place,Fast Food Restaurant,Beer Store,Sandwich Place,Fried Chicken Joint,Department Store
4,0,"Alderwood,Long Branch",Pizza Place,Pharmacy,Sandwich Place,Pub,Athletics & Sports,Skating Rink,Gym,Coffee Shop,Construction & Landscaping,Department Store


In [30]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,-79.315572,0,Intersection,Pizza Place,Coffee Shop,Hockey Arena,Portuguese Restaurant,Women's Store,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
3,North York,-79.464763,0,Furniture / Home Store,Accessories Store,Coffee Shop,Miscellaneous Shop,Event Space,Clothing Store,Boutique,Vietnamese Restaurant,Creperie,Diner
4,Downtown Toronto,-79.389494,0,Coffee Shop,Yoga Studio,Burrito Place,Italian Restaurant,Creperie,Park,Portuguese Restaurant,Gym,Arts & Crafts Store,Field
5,Queen's Park,-79.532242,0,Coffee Shop,Yoga Studio,Burrito Place,Italian Restaurant,Creperie,Park,Portuguese Restaurant,Gym,Arts & Crafts Store,Field
15,Downtown Toronto,-79.375418,0,Gym,Italian Restaurant,Cosmetics Shop,Coffee Shop,Creperie,Gastropub,Japanese Restaurant,BBQ Joint,Restaurant,Food Truck
17,Etobicoke,-79.577201,0,Liquor Store,Convenience Store,Pizza Place,Cosmetics Shop,Coffee Shop,Beer Store,Café,Diner,Empanada Restaurant,Electronics Store
22,Scarborough,-79.216917,0,Coffee Shop,Indian Restaurant,Korean Restaurant,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
23,East York,-79.363452,0,Coffee Shop,Supermarket,Gym,Fish & Chips Shop,Liquor Store,Restaurant,Bike Shop,Sporting Goods Shop,Sports Bar,Grocery Store
24,Downtown Toronto,-79.387383,0,Coffee Shop,Park,Seafood Restaurant,Gastropub,Japanese Restaurant,Italian Restaurant,Modern European Restaurant,Sushi Restaurant,Department Store,Drugstore
25,Downtown Toronto,-79.422564,0,Café,Grocery Store,Restaurant,Italian Restaurant,Coffee Shop,Candy Store,Diner,Electronics Store,Eastern European Restaurant,Drugstore


In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,-79.360636,1,Breakfast Spot,Historic Site,Park,Spa,Restaurant,Bakery,Pub,Gym / Fitness Center,Coffee Shop,Creperie
6,Scarborough,-79.194353,1,Fast Food Restaurant,Wings Joint,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
7,North York,-79.352188,1,Gym / Fitness Center,Caribbean Restaurant,Japanese Restaurant,Café,Cosmetics Shop,Construction & Landscaping,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
8,East York,-79.309937,1,Pizza Place,Gym / Fitness Center,Bank,Gastropub,Intersection,Fast Food Restaurant,Pet Store,Bus Line,Pharmacy,Asian Restaurant
9,Downtown Toronto,-79.378937,1,Café,Comic Shop,Music Venue,Clothing Store,Pizza Place,Plaza,Burrito Place,Theater,Tea Room,Deli / Bodega
10,North York,-79.445073,1,Japanese Restaurant,Sushi Restaurant,Asian Restaurant,Pub,Women's Store,Department Store,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
13,North York,-79.340923,1,Gym,Discount Store,Concert Hall,Bike Shop,Sporting Goods Shop,Beer Store,Japanese Restaurant,Italian Restaurant,Restaurant,Clothing Store
14,East York,-79.318389,1,Pharmacy,Beer Store,Cosmetics Shop,Curling Ice,Skating Rink,Park,Video Store,Gastropub,Electronics Store,Drugstore
18,Scarborough,-79.188711,1,Rental Car Location,Breakfast Spot,Medical Center,Mexican Restaurant,Intersection,Electronics Store,Pizza Place,Creperie,Diner,Eastern European Restaurant
20,Downtown Toronto,-79.373306,1,Liquor Store,Concert Hall,Farmers Market,Museum,Vegetarian / Vegan Restaurant,Park,Beer Bar,French Restaurant,Steakhouse,Tea Room


In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,-79.160497,2,Bar,Women's Store,Fast Food Restaurant,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store


In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,-79.329656,3,Bus Stop,Construction & Landscaping,Park,Food & Drink Shop,Women's Store,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
16,York,-79.428191,3,Park,Field,Hockey Arena,Trail,Women's Store,Department Store,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
19,East Toronto,-79.293031,3,Park,Health Food Store,Trail,Pub,Women's Store,Deli / Bodega,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
21,York,-79.453512,3,Park,Women's Store,Market,Fast Food Restaurant,Convenience Store,Cosmetics Shop,Construction & Landscaping,Empanada Restaurant,Electronics Store,Eastern European Restaurant
35,East York,-79.338106,3,Park,Convenience Store,Women's Store,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
40,North York,-79.464763,3,Park,Playground,Bus Stop,Snack Place,Airport,Construction & Landscaping,Concert Hall,Electronics Store,Eastern European Restaurant,Drugstore
49,North York,-79.490074,3,Construction & Landscaping,Park,Basketball Court,Bakery,Women's Store,Diner,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
61,Central Toronto,-79.38879,3,Park,Swim School,Bus Line,Women's Store,Dessert Shop,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
64,York,-79.518188,3,Park,Convenience Store,Women's Store,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
66,North York,-79.400049,3,Park,Convenience Store,Bar,Bank,Women's Store,Diner,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore


In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,-79.532242,4,Baseball Field,Women's Store,Fast Food Restaurant,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
101,Etobicoke,-79.498509,4,Breakfast Spot,Baseball Field,Diner,Farmers Market,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
