# _Part1: Scrapping data from Wikipedia_

In [402]:
from bs4 import BeautifulSoup
import requests

#Download the page
raw_data=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
#print(raw_data)

In [403]:
#Parse the page with bs
parsed_soup=BeautifulSoup(raw_data, features="lxml")
#print(parsed_soup.prettify())

In [404]:
#Create a Pandas dataframe

import pandas as pd
columns_name=['PostalCode', 'Borough', 'Neighborhood']
toronto_df=pd.DataFrame(columns=columns_name)


In [405]:
#Parse the HTML to get the data into the Dataframe
table=parsed_soup.find('table', class_='wikitable sortable')
#print(table)
postalCode=""
borough=""
neighborhood=""
all_tr=table.find_all('tr')

for tr in all_tr:
    all_td=tr.find_all('td')
    i=0;
    for td in all_td:
        #print(td)
        #print(td.text+str(i))
        if i==0:
            postalCode=td.text.strip('\n')
            i=i+1
            continue
        if i==1:
            borough=td.text.strip('\n')
            i=i+1
            continue
        if i==2:
            neighborhood=td.text.strip('\n').replace('/',',')
            i=i+1
            continue
    
    toronto_df=toronto_df.append({'PostalCode': postalCode,'Borough': borough,'Neighborhood': neighborhood}, ignore_index=True)
        
    


In [406]:
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [407]:
#Remove the first row
toronto_df.drop(toronto_df.index[0],inplace=True)
toronto_df.head()


Unnamed: 0,PostalCode,Borough,Neighborhood
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park , Harbourfront"


In [408]:
#Remove all Borough that are not assigned
toronto_df=toronto_df[toronto_df.Borough!='Not assigned']
toronto_df.reset_index(drop=True,inplace=True)
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [409]:
# if a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
       
toronto_df.loc[toronto_df['Neighborhood']=='Not assigned','Neighborhood'] =toronto_df['Borough']
toronto_df.head()  

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [410]:
#More than one neighborhood can exist in one postal code area. These two rows will be combined into one row with the neighborhoods separated with a comma
toronto_df=toronto_df.groupby(['PostalCode'],sort=False).agg(', '.join)
toronto_df.reset_index(inplace=True)
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [411]:
toronto_df.shape

(103, 3)

# _Part2: GeoEncoder_

In [412]:
#Using CSV, so downloading
!wget -q -O Toronto_lat_data.csv https://cocl.us/Geospatial_data
print('Download Complete')

Download Complete


In [413]:
#Reading in pandas and merging with the toronto_df
toronto_lat_lon=pd.read_csv('Toronto_lat_data.csv')
toronto_lat_lon.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [414]:
#Rename the columns to match the names with toronto_df
toronto_lat_lon.columns=['PostalCode','Latitude','Longitude']
toronto_lat_lon.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [415]:
#Mergiing with toronto_df

toronto_df=pd.merge(toronto_df,toronto_lat_lon, on='PostalCode')
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494


In [416]:
toronto_df.shape

(103, 5)

# _Part 3: Analyzing the neighbourhoods of Toronto_

In [417]:
#Extracting the borough which are in Toronto
toronto_df_new=toronto_df[toronto_df['Borough'].str.contains("Toronto")]
toronto_df_new.reset_index(drop=True, inplace=True)

In [418]:
toronto_df_new

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin , Dovercourt Village",43.669005,-79.442259


In [419]:
import json # library to handle JSON files
import folium # map rendering library
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [420]:
#Getting lat and long for Toronrto
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude_toronto = location.latitude
longitude_toronto = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude_toronto, longitude_toronto))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [421]:
#Building the map of toronto

map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=10)

#add markers to map

for lat, longt, borough, neighbourhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'],toronto_df_new['Borough'], toronto_df_new['Neighborhood']):
    label='{}, {}'.format(neighbourhood,borough)
    label=folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,longt],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)


In [422]:
map_toronto

In [1]:
#Foursquare API creds

CLIENT_ID = 'removed' # your Foursquare ID
CLIENT_SECRET = 'removed' # your Foursquare Secret
VERSION = '20202004' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: removed
CLIENT_SECRET:removed


**_Function to fetch nearby venues for all neighbourhoods_**

In [424]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    radius=500
    LIMIT=100
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [425]:
toronto_venues=getNearbyVenues(names=toronto_df_new['Neighborhood'],
                                   latitudes=toronto_df_new['Latitude'],
                                   longitudes=toronto_df_new['Longitude']
                                  )

Regent Park , Harbourfront
Queen's Park , Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond , Adelaide , King
Dufferin , Dovercourt Village
Harbourfront East , Union Station , Toronto Islands
Little Portugal , Trinity
The Danforth West , Riverdale
Toronto Dominion Centre , Design Exchange
Brockton , Parkdale Village , Exhibition Place
India Bazaar , The Beaches West
Commerce Court , Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park , The Junction South
North Toronto West
The Annex , North Midtown , Yorkville
Parkdale , Roncesvalles
Davisville
University of Toronto , Harbord
Runnymede , Swansea
Moore Park , Summerhill East
Kensington Market , Chinatown , Grange Park
Summerhill West , Rathnelly , South Hill , Forest Hill SE , Deer Park
CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport
Roseda

In [426]:
print(toronto_venues.shape)
toronto_venues.head()

(1609, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park , Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [427]:
#Checking how many venues are returned for each neighbourhood
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,55,55,55,55,55,55
"Brockton , Parkdale Village , Exhibition Place",23,23,23,23,23,23
Business reply mail Processing CentrE,17,17,17,17,17,17
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport",15,15,15,15,15,15
Central Bay Street,62,62,62,62,62,62
Christie,18,18,18,18,18,18
Church and Wellesley,71,71,71,71,71,71
"Commerce Court , Victoria Hotel",100,100,100,100,100,100
Davisville,36,36,36,36,36,36
Davisville North,7,7,7,7,7,7


In [428]:
print('There are '+str(len(toronto_venues['Venue Category'].unique()))+ ' unique categories')

There are 231 unique categories


In [429]:
#Converting Velue categories to One hot encoding
toronto_onehot=pd.get_dummies(toronto_venues['Venue Category'],prefix="", prefix_sep="")
toronto_onehot['NeighborhoodP']=toronto_venues['Neighborhood'] # Neighborhood is retirned as a venue category
fc=[toronto_onehot.columns[-1]]+list(toronto_onehot.columns[:-1])
fc
toronto_onehot=toronto_onehot[fc]
toronto_onehot.head()
#toronto_venues[toronto_venues['Venue Category']=='Neighborhood']

Unnamed: 0,NeighborhoodP,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [430]:
toronto_onehot.shape

(1609, 232)

**_let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category to understand how many venues are there in one neighbourhood_**

In [431]:
toronto_grouped=toronto_onehot.groupby('NeighborhoodP').mean().reset_index()
toronto_grouped

Unnamed: 0,NeighborhoodP,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,...,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0
1,"Brockton , Parkdale Village , Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,"CN Tower , King and Spadina , Railway Lands , ...",0.066667,0.066667,0.066667,0.133333,0.2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.016129
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,...,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028169
7,"Commerce Court , Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [432]:
toronto_grouped.shape

(39, 232)

**_Let's print each neighborhood along with the top 5 most common venues_**

In [433]:
top_venue_limit=5

for area in toronto_grouped['NeighborhoodP']:
    print("-----"+area+"-----")
    temp=toronto_grouped[toronto_grouped['NeighborhoodP']==area].T.reset_index() #Get area data
    temp.columns=['venue','freq'] # rename columns
    temp=temp.iloc[1:] #Remove the first row as its displayes area name
    temp['freq']=temp['freq'].astype(float) # chnage freq to float for sorting
    temp=temp.round({'freq':2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(top_venue_limit))

-----Berczy Park-----
            venue  freq
0     Coffee Shop  0.07
1      Restaurant  0.04
2        Beer Bar  0.04
3            Café  0.04
4  Farmers Market  0.04
-----Brockton , Parkdale Village , Exhibition Place-----
               venue  freq
0               Café  0.13
1          Nightclub  0.09
2        Coffee Shop  0.09
3     Breakfast Spot  0.09
4  Convenience Store  0.04
-----Business reply mail Processing CentrE-----
                venue  freq
0  Light Rail Station  0.12
1         Yoga Studio  0.06
2       Garden Center  0.06
3         Pizza Place  0.06
4          Comic Shop  0.06
-----CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport-----
              venue  freq
0   Airport Service  0.20
1    Airport Lounge  0.13
2           Airport  0.07
3               Bar  0.07
4  Sculpture Garden  0.07
-----Central Bay Street-----
                venue  freq
0         Coffee Shop  0.21
1  Italian Restaurant  0.08
2      

**_Let's put all of this data into one Panda DF_**

In [434]:
#Lets write a function which can sort data for us
def return_top_venues(row,limit):
    row_venues=row.iloc[1:]
    row_venues_sorted=row_venues.sort_values(ascending=False)
    return row_venues_sorted.index[0:limit]

In [436]:
import numpy as np

venue_limit=10
columns=['Neighborhood']

for k in np.arange(venue_limit):
    if(k==0):
        columns.append('1st most commonn venue')
        continue
    if(k==1):
        columns.append('2nd most commonn venue')
        continue
    if(k==2):
        columns.append('3rd most commonn venue')
        continue
    if(k>2):
        columns.append('{}th most common venue'.format(k+1))

toronto_neighbour_sorted=pd.DataFrame(columns=columns)
toronto_neighbour_sorted['Neighborhood']=toronto_grouped['NeighborhoodP']

for j in np.arange(toronto_grouped.shape[0]):
        
        toronto_neighbour_sorted.iloc[j,1:]=return_top_venues(toronto_grouped.iloc[j,:],venue_limit)

toronto_neighbour_sorted

Unnamed: 0,Neighborhood,1st most commonn venue,2nd most commonn venue,3rd most commonn venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Beer Bar,Bakery,Seafood Restaurant,Farmers Market,Cheese Shop,Italian Restaurant,Café
1,"Brockton , Parkdale Village , Exhibition Place",Café,Breakfast Spot,Nightclub,Coffee Shop,Bakery,Performing Arts Venue,Pet Store,Climbing Gym,Restaurant,Burrito Place
2,Business reply mail Processing CentrE,Light Rail Station,Yoga Studio,Spa,Garden Center,Garden,Fast Food Restaurant,Farmers Market,Comic Shop,Pizza Place,Restaurant
3,"CN Tower , King and Spadina , Railway Lands , ...",Airport Service,Airport Lounge,Harbor / Marina,Sculpture Garden,Airport Food Court,Airport Gate,Bar,Boat or Ferry,Boutique,Coffee Shop
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Ice Cream Shop,Japanese Restaurant,Salad Place,Burger Joint,Bubble Tea Shop,Fried Chicken Joint
5,Christie,Grocery Store,Café,Park,Gas Station,Candy Store,Restaurant,Diner,Baby Store,Athletics & Sports,Italian Restaurant
6,Church and Wellesley,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Yoga Studio,Hotel,Mediterranean Restaurant,Burger Joint,Men's Store,Pub
7,"Commerce Court , Victoria Hotel",Coffee Shop,Café,Restaurant,Hotel,American Restaurant,Gym,Seafood Restaurant,Japanese Restaurant,Italian Restaurant,Deli / Bodega
8,Davisville,Pizza Place,Sandwich Place,Dessert Shop,Gym,Café,Sushi Restaurant,Italian Restaurant,Coffee Shop,Indian Restaurant,Bar
9,Davisville North,Park,Hotel,Food & Drink Shop,Sandwich Place,Department Store,Breakfast Spot,Gym,Coworking Space,Doner Restaurant,Farmers Market


In [437]:
toronto_neighbour_sorted.shape

(39, 11)

**_Lets cluster our data_**

In [438]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('NeighborhoodP', axis=1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
print(kmeans.labels_[:])

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0
 0 0]


In [439]:
# add clustering labels
toronto_neighbour_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df_new


In [440]:
toronto_merged = toronto_merged.join(toronto_neighbour_sorted.set_index('Neighborhood'), on='Neighborhood')

In [441]:
toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st most commonn venue,2nd most commonn venue,3rd most commonn venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Theater,Café,Restaurant,Yoga Studio,Event Space
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Diner,Yoga Studio,Burger Joint,Fried Chicken Joint,Beer Bar,Spa,Italian Restaurant,Juice Bar
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Café,Japanese Restaurant,Cosmetics Shop,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Thai Restaurant,Fast Food Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Hotel,Cocktail Bar,Gastropub,Italian Restaurant,American Restaurant,Gym,Restaurant,Farmers Market
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Health Food Store,Pub,Trail,Neighborhood,Yoga Studio,Donut Shop,Distribution Center,Dog Run,Doner Restaurant,Eastern European Restaurant


In [443]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [446]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st most commonn venue,2nd most commonn venue,3rd most commonn venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Theater,Café,Restaurant,Yoga Studio,Event Space
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Sushi Restaurant,Diner,Yoga Studio,Burger Joint,Fried Chicken Joint,Beer Bar,Spa,Italian Restaurant,Juice Bar
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Café,Japanese Restaurant,Cosmetics Shop,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Thai Restaurant,Fast Food Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Hotel,Cocktail Bar,Gastropub,Italian Restaurant,American Restaurant,Gym,Restaurant,Farmers Market
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Health Food Store,Pub,Trail,Neighborhood,Yoga Studio,Donut Shop,Distribution Center,Dog Run,Doner Restaurant,Eastern European Restaurant
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Restaurant,Beer Bar,Bakery,Seafood Restaurant,Farmers Market,Cheese Shop,Italian Restaurant,Café
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Ice Cream Shop,Japanese Restaurant,Salad Place,Burger Joint,Bubble Tea Shop,Fried Chicken Joint
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Gas Station,Candy Store,Restaurant,Diner,Baby Store,Athletics & Sports,Italian Restaurant
8,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568,0,Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant,Deli / Bodega,Thai Restaurant,Clothing Store,Breakfast Spot
9,M6H,West Toronto,"Dufferin , Dovercourt Village",43.669005,-79.442259,0,Bakery,Pharmacy,Park,Gym / Fitness Center,Middle Eastern Restaurant,Music Venue,Recording Studio,Café,Brewery,Brazilian Restaurant


In [447]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st most commonn venue,2nd most commonn venue,3rd most commonn venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
18,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1,Park,Lawyer,Bus Line,Swim School,Distribution Center,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
29,M4T,Central Toronto,"Moore Park , Summerhill East",43.689574,-79.38316,1,Park,Playground,Summer Camp,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
33,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1,Park,Playground,Trail,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [448]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st most commonn venue,2nd most commonn venue,3rd most commonn venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
19,M5N,Central Toronto,Roselawn,43.711695,-79.416936,2,Garden,Pool,Diner,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
