Step 1 - import required libraries 

In [1]:
#import required libraries
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
from bs4 import BeautifulSoup #import the Beautiful soup functions to parse the data returned from the website
from geopy.geocoders import Nominatim # import Nominatim for lan, lat

Step 2 - Using requests and BeautifulSoap libraries to scrap the data from wiki page and identified the table for data extract

In [2]:
# wiki URL to get Toronta neighborhood 
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
results = requests.get(url)
soup = BeautifulSoup(results.text,'html.parser')
all_tables=soup.find_all('table')
# assign the right table to object for data parsing
right_table = all_tables[0]

Step 3- Extract the table data from webpage and store in panda dataframe

In [3]:
# to get all rows and cols from this table
rows = right_table.find_all("tr")
rows_count = len(rows)
# define list to store the data 
postcode = []
borough = []
neighbourhood = []

for i in range(1,rows_count):
    cells = rows[i].findAll('td')
    postcode.append(cells[0].text)
    borough.append(cells[1].text)
    neighbourhood.append(cells[2].text.strip('\n'))

toronto_df = pd.DataFrame({'postcode':postcode,
                           'borough':borough,
                           'neighbourhood':neighbourhood})
toronto_df.head()

Unnamed: 0,postcode,borough,neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
# print the shape of the dataframe
toronto_df.shape

(287, 3)

Step 4 - Clean dataframe by removing 'Not assigned' rows 

In [5]:
toronto_df_clean = toronto_df.copy()
toronto_df_clean.drop(toronto_df_clean[toronto_df_clean['borough'] =='Not assigned'].index,inplace=True)
toronto_df_clean.head()

Unnamed: 0,postcode,borough,neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [6]:
# to find out that Not assigned neighourhood. it will be same as borough
toronto_df_clean[toronto_df_clean['neighbourhood']=='Not assigned']

Unnamed: 0,postcode,borough,neighbourhood
7,M7A,Queen's Park,Not assigned


In [7]:
# to change Not assigned value in neighbourhood value to borough value
for index, row in toronto_df_clean.iterrows():
    if row['neighbourhood']=='Not assigned':
        toronto_df_clean.at[index,'neighbourhood']=toronto_df_clean.at[index,'borough']



In [8]:
# to check the values are updated in the dataframe
toronto_df_clean[toronto_df_clean['neighbourhood']==toronto_df_clean['borough']]

Unnamed: 0,postcode,borough,neighbourhood
7,M7A,Queen's Park,Queen's Park


Step 5 - Group the neighbourhood by Postcode and borough. More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table

In [9]:
toronto_df_grouped = toronto_df_clean.groupby(['postcode','borough']).agg(neighbourhood=('neighbourhood', ', '.join)).reset_index()
toronto_df_grouped.head()

Unnamed: 0,postcode,borough,neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Step 6 - print the shape of the final dataframe

In [10]:
toronto_df_grouped.shape

(103, 3)

Step 7 - mapping geographical coordinates of a given postal code. API is not working properly it shows time out error most of time. So mapping with CSV file


In [11]:
# to open the geo file
geo_df = pd.read_csv(r'C:\DataScience\IBM Data Science\Final_project\Geospatial_Coordinates.csv')
geo_df.columns =['postcode','Latitude','Longitude']
geo_df.head()

Unnamed: 0,postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
# to check the shape of the geo dataframe
geo_df.shape


(103, 3)

In [13]:
#print the new dataframe
toronto_geo = pd.merge(toronto_df_grouped, geo_df, on='postcode')
toronto_geo

Unnamed: 0,postcode,borough,neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437


Step 8 -Use geopy library to get the latitude and longitude values of New York City.

In [15]:
from geopy.geocoders import Nominatim
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Ontario are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Ontario are 43.653963, -79.387207.


Step 9 -Create a map of Toronto with neighborhoods using latitude and longitude values

In [16]:
# create map of New York using latitude and longitude values
import folium
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_geo['Latitude'], toronto_geo['Longitude'], toronto_geo['borough'], toronto_geo['neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Step 10 - Define Foursquare Credentials and Version

In [17]:
CLIENT_ID = '####################################' # your Foursquare ID
CLIENT_SECRET = '################################' # your Foursquare Secret
VERSION = '20191109' # Foursquare API version



In [18]:
toronto_geo['borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

In [45]:
# filter data where boroughs is Toronto 
toronto_neigh = toronto_geo[toronto_geo['borough'].str.contains('Toronto', regex=False)].reset_index(drop=True)
toronto_neigh.head()

Unnamed: 0,postcode,borough,neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [46]:
print(toronto_neigh.shape)

(39, 5)


In [47]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# function to get neighbouring venues in and around Tronoto
def getNearbyVenues(names, latitudes, longitudes,radius=800):
    
    venues_list=[]
    LIMIT = 100
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
                
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [48]:
# to get venues details around toronto neighbourhood
toronto_venues = getNearbyVenues(names=toronto_neigh['neighbourhood'],
                                   latitudes=toronto_neigh['Latitude'],
                                   longitudes=toronto_neigh['Longitude'])


The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

lets check the data and size of toronto_venues

In [49]:
print(toronto_venues.shape)
toronto_venues[toronto_venues['Neighborhood']== "Queen's Park"]

(2727, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
2718,Queen's Park,43.667856,-79.532242,Java Joe's Village Cafe,43.662461,-79.532054,Café
2719,Queen's Park,43.667856,-79.532242,TD Canada Trust,43.662658,-79.53186,Bank
2720,Queen's Park,43.667856,-79.532242,Shoppers Drug Mart,43.663067,-79.531753,Pharmacy
2721,Queen's Park,43.667856,-79.532242,Thorncrest Drug Store,43.662988,-79.531817,Pharmacy
2722,Queen's Park,43.667856,-79.532242,Foodland - Toronto,43.662724,-79.531984,Grocery Store
2723,Queen's Park,43.667856,-79.532242,Thorncrest Plaza,43.66262,-79.532146,Shopping Mall
2724,Queen's Park,43.667856,-79.532242,Princess Margaret Park,43.667835,-79.539934,Playground
2725,Queen's Park,43.667856,-79.532242,Humber Valley Park,43.664825,-79.524999,Park
2726,Queen's Park,43.667856,-79.532242,Humber Valley Rink,43.664826,-79.524873,Skating Rink


Let's find out how many unique categories can be curated from all the returned venues

In [50]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 283 uniques categories.


### Analyze Each Neighborhood

In [51]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

cl = toronto_onehot.columns.tolist()
cl.remove('Neighborhood')
cl.insert(0,'Neighborhood')
toronto_onehot = toronto_onehot[cl]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Tunnel,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
# let's examine the new dataframe size.
toronto_onehot.shape

(2727, 283)

In [53]:
toronto_onehot['Neighborhood'].value_counts()

The Danforth West, Riverdale                                                                                  100
Adelaide, King, Richmond                                                                                      100
Stn A PO Boxes 25 The Esplanade                                                                               100
St. James Town                                                                                                100
Harbourfront East, Toronto Islands, Union Station                                                             100
Central Bay Street                                                                                            100
Harbord, University of Toronto                                                                                100
Little Portugal, Trinity                                                                                      100
Design Exchange, Toronto Dominion Centre                                                

In [55]:
#let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Tunnel,Udon Restaurant,University,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,...,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.038462,0.038462,0.038462,0.076923,0.076923,0.076923,0.0,...,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
#lets confirm the last size of the dataframe
toronto_grouped.shape

(39, 283)

In [57]:
#function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [58]:
#new dataframe and display the top 10 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Hotel,Steakhouse,Sushi Restaurant,Gastropub,Restaurant,Theater,Bar,Bakery
1,Berczy Park,Coffee Shop,Café,Restaurant,Hotel,Beer Bar,Japanese Restaurant,Italian Restaurant,Bakery,Cheese Shop,Steakhouse
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Café,Furniture / Home Store,Bakery,Restaurant,Arts & Crafts Store,Bar,Nightclub,Theme Park Ride / Attraction,Italian Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Fast Food Restaurant,Light Rail Station,Park,Burrito Place,Grocery Store,Bar,Brewery,Coffee Shop,Harbor / Marina,Pizza Place
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Harbor / Marina,Boat or Ferry,Airport Lounge,Sculpture Garden,Airport Terminal,Airport Service,Rental Car Location,Airport Gate,Coffee Shop,Bar


Cluster Neighborhoods

In [59]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [62]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted.head()


(39, 12)


Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide, King, Richmond",Coffee Shop,Café,Hotel,Steakhouse,Sushi Restaurant,Gastropub,Restaurant,Theater,Bar,Bakery
1,0,Berczy Park,Coffee Shop,Café,Restaurant,Hotel,Beer Bar,Japanese Restaurant,Italian Restaurant,Bakery,Cheese Shop,Steakhouse
2,0,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Café,Furniture / Home Store,Bakery,Restaurant,Arts & Crafts Store,Bar,Nightclub,Theme Park Ride / Attraction,Italian Restaurant
3,0,Business Reply Mail Processing Centre 969 Eastern,Fast Food Restaurant,Light Rail Station,Park,Burrito Place,Grocery Store,Bar,Brewery,Coffee Shop,Harbor / Marina,Pizza Place
4,0,"CN Tower, Bathurst Quay, Island airport, Harbo...",Harbor / Marina,Boat or Ferry,Airport Lounge,Sculpture Garden,Airport Terminal,Airport Service,Rental Car Location,Airport Gate,Coffee Shop,Bar


In [63]:
neighborhoods_venues_sorted['Cluster Labels'].unique()

array([0, 4, 2, 1, 3], dtype=int64)

In [64]:
toronto_merged = toronto_neigh
toronto_merged.columns = ['Postcode','Borough','Neighborhood','Latitude','Longitude']
toronto_merged.head()
toronto_merged.shape

(39, 5)

In [65]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
#toronto_merged['Cluster Labels'].astype(int)
toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Bar,Breakfast Spot,Japanese Restaurant,Bakery,Sandwich Place,Caribbean Restaurant,Indian Restaurant,Bagel Shop,Bank
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Pub,Café,Restaurant,Ice Cream Shop,Fast Food Restaurant,Italian Restaurant,Yoga Studio,Discount Store
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Indian Restaurant,Park,Bus Stop,Café,Fast Food Restaurant,Brewery,Burger Joint,Sandwich Place,Italian Restaurant,Coffee Shop
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Bar,Bakery,Diner,Sandwich Place,American Restaurant,Pizza Place,Sushi Restaurant,Italian Restaurant
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,Gym / Fitness Center,Park,Bookstore,Bus Line,Restaurant,Café,Coffee Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


visualize the Toronto neighborhood based on the clusters

In [66]:
# create map
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Conclusion - based on clustering , There are around 38 neighbhorhood categorised into cluster 0, those neighbhors are East Toronto, West Toronto and Downtwon Toronto

In [67]:
toronto_merged[toronto_merged['Cluster Labels'] ==0]

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Pub,Bar,Breakfast Spot,Japanese Restaurant,Bakery,Sandwich Place,Caribbean Restaurant,Indian Restaurant,Bagel Shop,Bank
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Pub,Café,Restaurant,Ice Cream Shop,Fast Food Restaurant,Italian Restaurant,Yoga Studio,Discount Store
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Indian Restaurant,Park,Bus Stop,Café,Fast Food Restaurant,Brewery,Burger Joint,Sandwich Place,Italian Restaurant,Coffee Shop
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Bar,Bakery,Diner,Sandwich Place,American Restaurant,Pizza Place,Sushi Restaurant,Italian Restaurant
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Pizza Place,Italian Restaurant,Coffee Shop,Café,Pharmacy,Dessert Shop,Sporting Goods Shop,Ramen Restaurant,Park,Deli / Bodega
7,M4S,Central Toronto,Davisville,43.704324,-79.38879,0,Italian Restaurant,Sushi Restaurant,Coffee Shop,Pizza Place,Café,Gym,Dessert Shop,Sandwich Place,Yoga Studio,Hotel
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049,0,Coffee Shop,Sushi Restaurant,Italian Restaurant,Thai Restaurant,Bagel Shop,Spa,Grocery Store,Gym,French Restaurant,Pub
11,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,0,Coffee Shop,Restaurant,Café,Pizza Place,Breakfast Spot,Beer Store,Italian Restaurant,Bakery,Japanese Restaurant,Thai Restaurant
12,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Café,Restaurant,Men's Store,Gay Bar,Diner,Park,Grocery Store
13,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0,Coffee Shop,Park,Restaurant,Italian Restaurant,Pub,Bakery,Café,Theater,Dance Studio,Thai Restaurant


In [68]:
toronto_merged[toronto_merged['Cluster Labels'] ==1]

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,1,Grocery Store,Park,Thai Restaurant,Sandwich Place,Gym,Playground,Café,Candy Store,Japanese Restaurant,Diner
10,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1,Trail,Park,Playground,Grocery Store,Bank,Candy Store,Comic Shop,Design Studio,Event Space,Ethiopian Restaurant
38,M9A,Downtown Toronto,Queen's Park,43.667856,-79.532242,1,Pharmacy,Café,Skating Rink,Playground,Grocery Store,Bank,Park,Shopping Mall,Design Studio,Dessert Shop


In [69]:
toronto_merged[toronto_merged['Cluster Labels'] ==2]

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,2,Gym / Fitness Center,Park,Bookstore,Bus Line,Restaurant,Café,Coffee Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


In [70]:
toronto_merged[toronto_merged['Cluster Labels'] ==3]

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,M5N,Central Toronto,Roselawn,43.711695,-79.416936,3,Playground,Health & Beauty Service,IT Services,Garden,Comedy Club,Comfort Food Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


In [71]:
toronto_merged[toronto_merged['Cluster Labels'] ==4]

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,4,Coffee Shop,Sporting Goods Shop,Café,Diner,Skating Rink,Italian Restaurant,Rental Car Location,Restaurant,Bakery,Salon / Barbershop
23,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307,4,Coffee Shop,Italian Restaurant,Park,Gastropub,Trail,Bakery,Bagel Shop,Japanese Restaurant,Deli / Bodega,Café
