In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

In [3]:
#get html from wiki page and create soup object
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')

#using soup object, iterate the .wikitable to get the data from the HTML page and store it into a list
data = []
columns = []
table = soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    #First row of data is the header
    if (index == 0):
        columns = section
    else:
        data.append(section)

#convert list into Pandas DataFrame
canada_df = pd.DataFrame(data = data,columns = columns)
canada_df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### Here is a full list with all postal codes
<br>
**Then we will remove the Not assigned and N/A parts**

In [4]:
canada_df = canada_df[canada_df['Borough'] != 'Not assigned']
canada_df.head(10)

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge
11,M3B,North York,Don Mills
12,M4B,East York,Parkview Hill / Woodbine Gardens
13,M5B,Downtown Toronto,"Garden District, Ryerson"


instructions: More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.

In [5]:
#check
canada_df.loc[canada_df['Postal code']=='M5A']

Unnamed: 0,Postal code,Borough,Neighborhood
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [6]:
canada_df.loc[canada_df['Postal code']=='M9V']

Unnamed: 0,Postal code,Borough,Neighborhood
143,M9V,Etobicoke,South Steeles / Silverstone / Humbergate / Jam...


**So here we gonna replace the / sign with , to match the desired table**

In [15]:
canada_df['Neighborhood']=canada_df['Neighborhood'].str.replace('/',',')
canada_df.head(10)

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park , Harbourfront"
5,M6A,North York,"Lawrence Manor , Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern , Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill , Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [16]:
canada_df.reset_index(drop=True)

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [18]:
np.shape(canada_df)

(103, 3)

In [20]:
from geopy.geocoders import Nominatim

import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

---
### Part 2

In [21]:
!wget -q -O "toronto_coordinates.csv" http://cocl.us/Geospatial_data
print('Coordinates downloaded!')
coors = pd.read_csv('toronto_coordinates.csv')

Coordinates downloaded!


In [22]:
coors.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [24]:
canada_df_temp = canada_df.set_index('Postal code')
coors_temp = coors.set_index('Postal Code')
toronto_df_coors = pd.concat([canada_df_temp, coors_temp], axis=1, join='inner')

toronto_df_coors.index.name = 'PostalCode'
toronto_df_coors.reset_index(inplace=True)

print(toronto_df_coors.shape)
toronto_df_coors.head(10)

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill , Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


---
### Part 3

In [25]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="tl-toronto-neigh")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


**Use the technique previous in python visualization**

In [26]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, post, borough, neigh in zip(toronto_df_coors['Latitude'], toronto_df_coors['Longitude'], toronto_df_coors['PostalCode'], toronto_df_coors['Borough'], toronto_df_coors['Neighborhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

**Here is all the datapoints, we should reduce the number adjusted our data**

In [28]:
toronto_boroughs = ['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']
toronto_central_df = toronto_df_coors[toronto_df_coors['Borough'].isin(toronto_boroughs)].reset_index(drop=True)
#print(toronto_central_df.shape)
toronto_central_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [29]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, long, post, borough, neigh in zip(toronto_central_df['Latitude'], toronto_central_df['Longitude'], toronto_central_df['PostalCode'], toronto_central_df['Borough'], toronto_central_df['Neighborhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

---
## Final Project Part

### Description <br>
**Here we will try to find out the best area in Toronto to live for young people, old people, and the best area for business.<br> This is very useful for some real estate agencies to develop their business and consulting companies to help their clients.**

In [30]:
# First we will use the Foursquare API
CLIENT_ID = '5LCOWRSOOBERWFQWQOF11VSTPQQU12NQK5R4ETGVES1C3ELA' # your Foursquare ID
CLIENT_SECRET = 'B0OC10NHX3IYERVXXKK3UXSN0PPODFWRT1GU2DRD4P2UULTK' # your Foursquare Secret
VERSION = '20190330' # Foursquare API version
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(toronto_central_df['Latitude'], toronto_central_df['Longitude'], toronto_central_df['PostalCode'], toronto_central_df['Borough'], toronto_central_df['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [31]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head(10)

(1624, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
5,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
6,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Corktown Common,43.655618,-79.356211,Park
7,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,The Extension Room,43.653313,-79.359725,Gym / Fitness Center
8,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site
9,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot


In [32]:
# Now we can check the type of venues with its counts here
venues_df.groupby(['PostalCode', 'Borough', 'Neighborhood'])['VenueName'].count()

PostalCode  Borough           Neighborhood                                                                                                    
M4E         East Toronto      The Beaches                                                                                                           5
M4K         East Toronto      The Danforth West , Riverdale                                                                                        43
M4L         East Toronto      India Bazaar , The Beaches West                                                                                      20
M4M         East Toronto      Studio District                                                                                                      40
M4N         Central Toronto   Lawrence Park                                                                                                         3
M4P         Central Toronto   Davisville North                                                             

**The above shows us, with a zip code, in each area, the number of venues**

In [33]:
print("there are ",len(venues_df['VenueCategory'].unique())," types of venues in Toronto")

there are  232  types of venues in Toronto


### Now we will create the matrix for data mining

In [34]:
# introduce dummy variables
toronto_central_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_central_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_central_onehot['Borough'] = venues_df['Borough'] 
toronto_central_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_central_onehot.columns[-3:]) + list(toronto_central_onehot.columns[:-3])
toronto_central_onehot = toronto_central_onehot[fixed_columns]

print(toronto_central_onehot.shape)
toronto_central_onehot.head(10)

(1624, 235)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,M5A,Downtown Toronto,"Regent Park , Harbourfront",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


**Here the 235 cols are 232 types of venues plus post code, borough and neighborhood**

In [35]:
# to avoid sparse matrix, we will use the mean freq instead of occurance
toronto_central_venues_freq = toronto_central_onehot.groupby(['PostalCode', 'Borough', 'Neighborhoods']).mean().reset_index()
print(toronto_central_venues_freq.shape)
toronto_central_venues_freq.head()

(39, 235)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West , Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.023256,...,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.023256
2,M4L,East Toronto,"India Bazaar , The Beaches West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.025
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
num_top_venues = 15

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_central_venues_freq['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_central_venues_freq['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_central_venues_freq['Neighborhoods']

for ind in np.arange(toronto_central_venues_freq.shape[0]):
    row_categories = toronto_central_venues_freq.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
neighborhoods_venues_sorted

Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
27,M5V,Downtown Toronto,"CN Tower , King and Spadina , Railway Lands , ...",Airport Service,Airport Lounge,Airport Terminal,Airport,Coffee Shop,Boutique,Boat or Ferry,Bar,Plane,Sculpture Garden,Harbor / Marina,Airport Food Court,Airport Gate,Costume Shop,Distribution Center
32,M6J,West Toronto,"Little Portugal , Trinity",Bar,Restaurant,Men's Store,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Café,Asian Restaurant,Miscellaneous Shop,Brewery,Record Shop,Pizza Place,Park,Cocktail Bar,New American Restaurant,Yoga Studio
25,M5S,Downtown Toronto,"University of Toronto , Harbord",Café,Bakery,Bar,Italian Restaurant,Japanese Restaurant,Restaurant,Bookstore,Chinese Restaurant,Dessert Shop,College Arts Building,Pub,College Gym,Sandwich Place,Noodle House,Nightclub
33,M6K,West Toronto,"Brockton , Parkdale Village , Exhibition Place",Café,Breakfast Spot,Coffee Shop,Yoga Studio,Gym,Pet Store,Performing Arts Venue,Nightclub,Italian Restaurant,Intersection,Convenience Store,Furniture / Home Store,Stadium,Climbing Gym,Burrito Place
15,M5C,Downtown Toronto,St. James Town,Café,Coffee Shop,Cocktail Bar,Italian Restaurant,Gastropub,American Restaurant,Seafood Restaurant,Clothing Store,Moroccan Restaurant,Cosmetics Shop,Creperie,Lingerie Store,Department Store,Restaurant,Hotel
3,M4M,East Toronto,Studio District,Café,Coffee Shop,Gastropub,Bakery,Brewery,American Restaurant,Neighborhood,Sandwich Place,Cheese Shop,Pet Store,Clothing Store,Park,Comfort Food Restaurant,Convenience Store,Seafood Restaurant
26,M5T,Downtown Toronto,"Kensington Market , Chinatown , Grange Park",Café,Coffee Shop,Mexican Restaurant,Dessert Shop,Vietnamese Restaurant,Bakery,Bar,Grocery Store,Vegetarian / Vegan Restaurant,Gaming Cafe,Cheese Shop,Caribbean Restaurant,Pharmacy,Park,Cocktail Bar
14,M5B,Downtown Toronto,"Garden District, Ryerson",Clothing Store,Coffee Shop,Café,Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Cosmetics Shop,Japanese Restaurant,Hotel,Bookstore,Fast Food Restaurant,Tea Room,Theater,Italian Restaurant,Lingerie Store
6,M4R,Central Toronto,North Toronto West,Clothing Store,Coffee Shop,Park,Salon / Barbershop,Restaurant,Rental Car Location,Café,Chinese Restaurant,Miscellaneous Shop,Sporting Goods Shop,Mexican Restaurant,Dessert Shop,Diner,Fast Food Restaurant,Ice Cream Shop
19,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",Coffee Shop,Aquarium,Hotel,Café,Scenic Lookout,Brewery,Sporting Goods Shop,Restaurant,Fried Chicken Joint,Italian Restaurant,Bar,Park,Music Venue,Pizza Place,History Museum


In [47]:
kclusters = 7

toronto_central_venues_freq_clustering = toronto_central_venues_freq.drop(['PostalCode', 'Borough', 'Neighborhoods'], 1)

kmeans = KMeans(n_clusters=kclusters, random_state=100).fit(toronto_central_venues_freq_clustering)

toronto_central_clustered_df = toronto_central_df
toronto_central_clustered_df['Cluster'] = kmeans.labels_

toronto_central_clustered_df = toronto_central_clustered_df.join(neighborhoods_venues_sorted.drop(['Borough', 'Neighborhoods'], 1).set_index('PostalCode'), on='PostalCode')
toronto_central_clustered_df.sort_values(['Cluster'] + freqColumns, inplace=True)
toronto_central_clustered_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
32,M5V,Downtown Toronto,"CN Tower , King and Spadina , Railway Lands , ...",43.628947,-79.39442,0,Airport Service,Airport Lounge,Airport Terminal,Airport,...,Boutique,Boat or Ferry,Bar,Plane,Sculpture Garden,Harbor / Marina,Airport Food Court,Airport Gate,Costume Shop,Distribution Center
11,M6J,West Toronto,"Little Portugal , Trinity",43.647927,-79.41975,0,Bar,Restaurant,Men's Store,Vietnamese Restaurant,...,Café,Asian Restaurant,Miscellaneous Shop,Brewery,Record Shop,Pizza Place,Park,Cocktail Bar,New American Restaurant,Yoga Studio
27,M5S,Downtown Toronto,"University of Toronto , Harbord",43.662696,-79.400049,0,Café,Bakery,Bar,Italian Restaurant,...,Restaurant,Bookstore,Chinese Restaurant,Dessert Shop,College Arts Building,Pub,College Gym,Sandwich Place,Noodle House,Nightclub
14,M6K,West Toronto,"Brockton , Parkdale Village , Exhibition Place",43.636847,-79.428191,0,Café,Breakfast Spot,Coffee Shop,Yoga Studio,...,Pet Store,Performing Arts Venue,Nightclub,Italian Restaurant,Intersection,Convenience Store,Furniture / Home Store,Stadium,Climbing Gym,Burrito Place
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Café,Coffee Shop,Cocktail Bar,Italian Restaurant,...,American Restaurant,Seafood Restaurant,Clothing Store,Moroccan Restaurant,Cosmetics Shop,Creperie,Lingerie Store,Department Store,Restaurant,Hotel
17,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Gastropub,Bakery,...,American Restaurant,Neighborhood,Sandwich Place,Cheese Shop,Pet Store,Clothing Store,Park,Comfort Food Restaurant,Convenience Store,Seafood Restaurant
30,M5T,Downtown Toronto,"Kensington Market , Chinatown , Grange Park",43.653206,-79.400049,0,Café,Coffee Shop,Mexican Restaurant,Dessert Shop,...,Bakery,Bar,Grocery Store,Vegetarian / Vegan Restaurant,Gaming Cafe,Cheese Shop,Caribbean Restaurant,Pharmacy,Park,Cocktail Bar
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Café,Restaurant,...,Bubble Tea Shop,Cosmetics Shop,Japanese Restaurant,Hotel,Bookstore,Fast Food Restaurant,Tea Room,Theater,Italian Restaurant,Lingerie Store
36,M5X,Downtown Toronto,"First Canadian Place , Underground city",43.648429,-79.38228,0,Coffee Shop,Café,Hotel,Japanese Restaurant,...,Restaurant,Deli / Bodega,Salad Place,Steakhouse,Asian Restaurant,Seafood Restaurant,American Restaurant,Concert Hall,Bar,Sushi Restaurant
16,M5L,Downtown Toronto,"Commerce Court , Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Café,Restaurant,Hotel,...,American Restaurant,Italian Restaurant,Japanese Restaurant,Seafood Restaurant,Deli / Bodega,Beer Bar,Cocktail Bar,Breakfast Spot,Gastropub,Tea Room


In [48]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_central_clustered_df['Latitude'], toronto_central_clustered_df['Longitude'], toronto_central_clustered_df['PostalCode'], toronto_central_clustered_df['Borough'], toronto_central_clustered_df['Neighborhood'], toronto_central_clustered_df['Cluster']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [55]:
toronto_central_clustered_df[toronto_central_clustered_df['Cluster']==1]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Pub,Coffee Shop,Neighborhood,Trail,...,Dumpling Restaurant,Donut Shop,Eastern European Restaurant,Deli / Bodega,Doner Restaurant,Dog Run,Electronics Store,Distribution Center,Discount Store,Diner


### Conclusion:
<br>
#### Here I create 7 clusters based on the 15 most important features of each area.<br>
- For the red points, they are the area that are similar which have **no unique** features from other area.<br>
- We can find for the orange points, there are more art facilities(Music venue, theater, performing arts venue...) and people seems doing more exercise here (more Athletics & Sports, parks...). So the orange area should be the place for people who want to live a healthy life with parks and art venues around, for example, people retired.<br>
- For cluster 3 which is the light blue point, the top 1-14 are all restaurant related venues. In other words, this area is a really diversified foodies area. For people who want to open a fancy restaurant, this is the place he should go to.<br>
- For cluster 2 which are the darker blue points, this is a tourists place with lots of hotels, restaurant , gyms. So for business owners who are targeting on tourists, this is definetly the best area to go.
- For cluster 5 which is the yellow point. This area has many bars, pubs, Italian restaurants, etc. So this should be an area that young people love to hang out and date. Young people who concerns night life important or people who are trying to find a best dating place should look into this area.
- For cluster 1 which is the purple, this is a typical living area for people not that rich since the top venues including the trail and discount store.