In [2]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

## Part 1: Scrape the Wikipedia page and gathering data into a Pandas dataframe

In [3]:
# Importing necessary packages

import pandas as pd 
import numpy as np
import requests
from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


In [4]:
# Scraping using the BeautifulSoup package

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')

Table = soup.find('table', class_='wikitable')
Postalcodes = {'PostalCode':[], 'Borough':[], 'Neighborhood':[]}
if Table:
    rows = Table.findAll('tr')
    for tr in rows[1:]:
        tds = tr.findAll('td')
        code = tds[0].text.strip()
        borough = tds[1].text.strip()
        nhood = tds[2].text.strip()
        if borough != 'Not assigned':
            Postalcodes['PostalCode'].append(code)
            Postalcodes['Borough'].append(borough)
            if nhood == 'Not assigned': nhood = borough
            Postalcodes['Neighborhood'].append(nhood)

# Convert the dictionary into a dataframe
Postalcodes_df = pd.DataFrame(Postalcodes)


# Combining rows with the same postal code into a single row with the neighborhoods separated with a comma
Postalcodes_df =  Postalcodes_df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(lambda x: ', '.join(x)).reset_index()



In [5]:
#Have a look at the dataframe created

print(Postalcodes_df.shape)

Postalcodes_df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Part 2:Fetching the coordinates of each neighborhood in Toronto

In [6]:
# Using the csv file provided to fetch the coordinates

geo_coordinates = pd.read_csv('https://cocl.us/Geospatial_data')
geo_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
# Combining the two dataframes and adding coordinate data to the original dataframe

Toronto_Postalcodes = Postalcodes_df.join(geo_coordinates.set_index('Postal Code'), on='PostalCode')

print('The dataframe has {} boroughs and {} postal code areas.'.format(
        len(Toronto_Postalcodes['Borough'].unique()),
        Toronto_Postalcodes.shape[0] ))
Toronto_Postalcodes.head()

The dataframe has 11 boroughs and 103 postal code areas.


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Part 3 Exploring and clustering the neighborhoods in Toronto

### 1. Fetch the geographical coordinates of Toronto city and creating a map

In [8]:
#Filter the data to boroughs containing the word 'Toronto'

Toronto_df = Toronto_Postalcodes[Toronto_Postalcodes['Borough'].str.contains('Toronto')]
Toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [9]:
# Fetch the coordinates of the first row of our Toronto Boroughs data
latitude= Toronto_df["Latitude"].iloc[0]
longitude= Toronto_df["Longitude"].iloc[0]

In [10]:
# Create map of Toronto using latitude and longitude values
Toronto_Map = folium.Map(location=(latitude,longitude),zoom_start=10)

# Add markers to map
for lat,lng,borough,neighborhood in zip(Toronto_df['Latitude'], Toronto_df['Longitude'], Toronto_df['Borough'],Toronto_df['Neighborhood']):
    label = '{},{}'.format(neighborhood,borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_Map)  
    
Toronto_Map

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

In [11]:
#Define Foursquare Credentials and Version
CLIENT_ID = 'I02PTHQVKY2CPBCNFCHPZFRV52ICY0PI2A1AWKGJBDU3U25K' # your Foursquare ID
CLIENT_SECRET = 'YFC0LQNCNJMJIEJO4QLGYCNLJHL5IBOFXEXSPIKWVCVJYLFT' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: I02PTHQVKY2CPBCNFCHPZFRV52ICY0PI2A1AWKGJBDU3U25K
CLIENT_SECRET:YFC0LQNCNJMJIEJO4QLGYCNLJHL5IBOFXEXSPIKWVCVJYLFT


In [12]:
#Exploring the first neighborhood in our dataframe

Neighborhood1 = Toronto_df['Neighborhood'].iloc[0]


# Get the neighborhood's latitude and longitude values.

Neighborhood1_latitude = latitude
Neighborhood1_longitude = longitude

print('Latitude and longitude values of {} are {}, {}.'.format(Neighborhood1, 
                                                               Neighborhood1_latitude, 
                                                               Neighborhood1_longitude))

Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


### 2.Now, let's get the top 100 venues that are in The Beaches within a radius of 500 meters.

In [13]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    Neighborhood1_latitude, 
    Neighborhood1_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=I02PTHQVKY2CPBCNFCHPZFRV52ICY0PI2A1AWKGJBDU3U25K&client_secret=YFC0LQNCNJMJIEJO4QLGYCNLJHL5IBOFXEXSPIKWVCVJYLFT&v=20180605&ll=43.67635739999999,-79.2930312&radius=500&limit=100'

Send the GET request and examine the results

In [14]:
Neighborhood_json = requests.get(url).json()
Neighborhood_json

{'meta': {'code': 200, 'requestId': '5da81372d69ed0002c5b8b09'},
 'response': {'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.680857404499996,
    'lng': -79.28682091449052},
   'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd461bc77b29c74a07d9282',
       'name': 'Glen Manor Ravine',
       'location': {'address': 'Glen Manor',
        'crossStreet': 'Queen St.',
        'lat': 43.67682094413784,
        'lng': -79.29394208780985,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67682094413784,
          'lng': -79.29394208780985}],
        'distanc

In [17]:
# Function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [18]:
# Now we are ready to clean the json and structure it into a pandas dataframe

import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

Venues = Neighborhood_json['response']['groups'][0]['items']
    
Nearby_Venues = json_normalize(Venues) # flatten JSON

# Filter Columns
Filtered_Columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
Nearby_Venues =Nearby_Venues.loc[:, Filtered_Columns]

# Filter the category for each row
Nearby_Venues['venue.categories'] = Nearby_Venues.apply(get_category_type, axis=1)

# Clean columns
Nearby_Venues.columns = [col.split(".")[-1] for col in Nearby_Venues.columns]

Nearby_Venues

Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,Grover Pub and Grub,Pub,43.679181,-79.297215
3,Upper Beaches,Neighborhood,43.680563,-79.292869


In [19]:
print('{} venues returned by Foursquare.'.format(Nearby_Venues.shape[0]))

4 venues returned by Foursquare.


###  3.Explore Neighborhoods in Toronto

In [20]:
# Create a function to repeat the same process for all neighborhoods in Toronto

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
# Run the above function on each neighborhood and create a new dataframe called Toronto_Venues

Toronto_Venues = getNearbyVenues(names=Toronto_df['Neighborhood'],
                                   latitudes=Toronto_df['Latitude'],
                                   longitudes=Toronto_df['Longitude']
                                  )


The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The 

In [22]:
# Check the size of the resulting Dataframe
print(Toronto_Venues.shape)
Toronto_Venues.head()

(1709, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [24]:
# Number of venues returned for each neighborhood
Toronto_Venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55
"Brockton, Exhibition Place, Parkdale Village",27,27,27,27,27,27
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",17,17,17,17,17,17
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,86,86,86,86,86,86
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,87,87,87,87,87,87


In [25]:
# Number of unique categories that can be curated from all the returned venues

print('There are {} uniques categories.'.format(len(Toronto_Venues['Venue Category'].unique())))

There are 241 uniques categories.


### 4.Analyze each Neighborhood

In [26]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_Venues[['Venue Category']], prefix="", prefix_sep="")


# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_Venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
Toronto_onehot.shape

(1709, 241)

In [28]:
# Now let us group rows by neighborhood and by taking the mean of frequency of the occurence of each category

Toronto_Grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_Grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,...,0.0,0.0,0.0,0.011628,0.0,0.0,0.011628,0.0,0.0,0.0
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.05,0.0,0.04,0.01,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.022989,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,...,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.0,0.011494,0.0


In [29]:
#Lets confirm the new size
Toronto_Grouped.shape

(38, 241)

In [30]:
# Lets print each neighborhood with top 5 most common venues

num_top_venues = 5

for hood in Toronto_Grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_Grouped[Toronto_Grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.07
1             Café  0.05
2       Steakhouse  0.04
3              Bar  0.04
4  Thai Restaurant  0.04


----Berczy Park----
            venue  freq
0     Coffee Shop  0.07
1    Cocktail Bar  0.05
2        Beer Bar  0.04
3          Bakery  0.04
4  Farmers Market  0.04


----Brockton, Exhibition Place, Parkdale Village----
                   venue  freq
0                   Café  0.11
1            Yoga Studio  0.07
2         Breakfast Spot  0.07
3  Performing Arts Venue  0.07
4            Coffee Shop  0.07


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0  Light Rail Station  0.12
1         Yoga Studio  0.06
2       Auto Workshop  0.06
3          Comic Shop  0.06
4                Park  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.18
1    Airport 

In [31]:
# Lets write a function to sort the venues in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [32]:
# Now let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# Create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Neighborhoods_Venues_Sorted = pd.DataFrame(columns=columns)
Neighborhoods_Venues_Sorted['Neighborhood'] = Toronto_Grouped['Neighborhood']

for ind in np.arange(Toronto_Grouped.shape[0]):
    Neighborhoods_Venues_Sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_Grouped.iloc[ind, :], num_top_venues)

Neighborhoods_Venues_Sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,Bar,Steakhouse,Hotel,Restaurant,Sushi Restaurant,Asian Restaurant,American Restaurant
1,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Steakhouse,Seafood Restaurant,Café,Cheese Shop,Farmers Market,Clothing Store
2,"Brockton, Exhibition Place, Parkdale Village",Café,Yoga Studio,Performing Arts Venue,Breakfast Spot,Coffee Shop,Italian Restaurant,Nightclub,Convenience Store,Intersection,Pet Store
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Garden Center,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Skate Park,Farmers Market
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Terminal,Airport Lounge,Coffee Shop,Boat or Ferry,Boutique,Sculpture Garden,Bar,Plane,Airport Gate


### 5.Cluster the Neighborhoods

In [33]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 11

Toronto_Grouped_Clustering = Toronto_Grouped.drop('Neighborhood', 1)

# run k-means clustering

kmeans=KMeans(init = "k-means++", n_clusters = kclusters, n_init = 12,random_state=0).fit(Toronto_Grouped_Clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100]

array([ 1,  1,  5,  9, 10,  5,  1,  5,  5,  1,  1,  5,  8,  1,  1,  5,  1,
        6,  5,  1,  1,  5,  0,  1,  3,  1,  5,  7,  2,  1,  1,  1,  1,  1,
        1,  4,  5,  1], dtype=int32)

In [49]:
#Neighborhoods_Venues_Sorted.drop(['Cluster Labels'],axis=1,inplace=True)
#neighborhoods_venues_sorted.head()

In [35]:
#Add cluster labels
Neighborhoods_Venues_Sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_Merged = Toronto_df

# Merge Neighborhood venue data with Toronto data to add latitude/longitude for each neighborhood
Toronto_Merged = Toronto_Merged.join(Neighborhoods_Venues_Sorted.set_index('Neighborhood'), on='Neighborhood')

Toronto_Merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Trail,Health Food Store,Pub,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Women's Store
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Pizza Place,Bookstore,Brewery,Bubble Tea Shop,Café
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,5,Park,Gym,Pub,Liquor Store,Board Shop,Fast Food Restaurant,Burger Joint,Fish & Chips Shop,Sandwich Place,Burrito Place
43,M4M,East Toronto,Studio District,43.659526,-79.340923,1,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Comfort Food Restaurant,Seafood Restaurant,Brewery,Sandwich Place
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Park,Swim School,Bus Line,Women's Store,Discount Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


### Visualizing the resulting clusters

In [36]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# Create the Cluster Map
Map_Clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_Merged['Latitude'], Toronto_Merged['Longitude'], Toronto_Merged['Neighborhood'], Toronto_Merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(Map_Clusters)
       
Map_Clusters

### Examine the Clusters

### Cluster 1

In [51]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 0, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
44,Lawrence Park,Central Toronto,0,Park,Swim School,Bus Line,Women's Store,Discount Store,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant


### Cluster 2

In [52]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 1, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
41,"The Danforth West, Riverdale",East Toronto,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Italian Restaurant,Furniture / Home Store,Pizza Place,Bookstore,Brewery,Bubble Tea Shop,Café
43,Studio District,East Toronto,1,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Comfort Food Restaurant,Seafood Restaurant,Brewery,Sandwich Place
46,North Toronto West,Central Toronto,1,Coffee Shop,Sporting Goods Shop,Clothing Store,Burger Joint,Salon / Barbershop,Café,Restaurant,Rental Car Location,Chinese Restaurant,Yoga Studio
49,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Central Toronto,1,Coffee Shop,Pub,Pizza Place,American Restaurant,Light Rail Station,Sports Bar,Restaurant,Supermarket,Sushi Restaurant,Bagel Shop
52,Church and Wellesley,Downtown Toronto,1,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Gay Bar,Hotel,Pub,Bubble Tea Shop,Burger Joint,Café
53,"Harbourfront, Regent Park",Downtown Toronto,1,Coffee Shop,Park,Pub,Bakery,Café,Theater,Mexican Restaurant,Breakfast Spot,Historic Site,Greek Restaurant
54,"Ryerson, Garden District",Downtown Toronto,1,Clothing Store,Coffee Shop,Cosmetics Shop,Middle Eastern Restaurant,Italian Restaurant,Café,Bakery,Lingerie Store,Ramen Restaurant,Restaurant
55,St. James Town,Downtown Toronto,1,Coffee Shop,Café,Hotel,Restaurant,Clothing Store,Gastropub,Breakfast Spot,Cocktail Bar,Cosmetics Shop,Italian Restaurant
56,Berczy Park,Downtown Toronto,1,Coffee Shop,Cocktail Bar,Beer Bar,Bakery,Steakhouse,Seafood Restaurant,Café,Cheese Shop,Farmers Market,Clothing Store
57,Central Bay Street,Downtown Toronto,1,Coffee Shop,Italian Restaurant,Café,Ice Cream Shop,Sandwich Place,Middle Eastern Restaurant,Burger Joint,Bar,Juice Bar,Japanese Restaurant


### Cluster 3

In [54]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 2, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Roselawn,Central Toronto,2,Home Service,Garden,Women's Store,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


### Cluster 4

In [55]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 3, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,"Moore Park, Summerhill East",Central Toronto,3,Playground,Park,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


### Cluster 5

In [56]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 4, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,The Beaches,East Toronto,4,Trail,Health Food Store,Pub,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Women's Store


### Cluster 6

In [57]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 5, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
42,"The Beaches West, India Bazaar",East Toronto,5,Park,Gym,Pub,Liquor Store,Board Shop,Fast Food Restaurant,Burger Joint,Fish & Chips Shop,Sandwich Place,Burrito Place
47,Davisville,Central Toronto,5,Sandwich Place,Pizza Place,Dessert Shop,Gym,Café,Italian Restaurant,Coffee Shop,Sushi Restaurant,Costume Shop,Brewery
51,"Cabbagetown, St. James Town",Downtown Toronto,5,Coffee Shop,Market,Pizza Place,Italian Restaurant,Chinese Restaurant,Pub,Restaurant,Café,Grocery Store,Bakery
66,"Harbord, University of Toronto",Downtown Toronto,5,Café,Bakery,Bar,Italian Restaurant,Japanese Restaurant,Bookstore,Restaurant,Pub,Beer Bar,Beer Store
67,"Chinatown, Grange Park, Kensington Market",Downtown Toronto,5,Café,Vegetarian / Vegan Restaurant,Bar,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Dumpling Restaurant,Bakery,Coffee Shop,Farmers Market
75,Christie,Downtown Toronto,5,Café,Grocery Store,Park,Convenience Store,Nightclub,Diner,Baby Store,Italian Restaurant,Restaurant,Coffee Shop
76,"Dovercourt Village, Dufferin",West Toronto,5,Supermarket,Bakery,Pharmacy,Bank,Bar,Middle Eastern Restaurant,Café,Music Venue,Art Gallery,Park
78,"Brockton, Exhibition Place, Parkdale Village",West Toronto,5,Café,Yoga Studio,Performing Arts Venue,Breakfast Spot,Coffee Shop,Italian Restaurant,Nightclub,Convenience Store,Intersection,Pet Store
82,"High Park, The Junction South",West Toronto,5,Mexican Restaurant,Café,Thai Restaurant,Flea Market,Diner,Fast Food Restaurant,Bookstore,Park,Speakeasy,Cajun / Creole Restaurant
83,"Parkdale, Roncesvalles",West Toronto,5,Breakfast Spot,Gift Shop,Restaurant,Dessert Shop,Eastern European Restaurant,Bar,Bank,Dog Run,Movie Theater,Italian Restaurant


### Cluster 7

In [58]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 6, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
64,"Forest Hill North, Forest Hill West",Central Toronto,6,Trail,Park,Sushi Restaurant,Jewelry Store,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Women's Store


### Cluster 8

In [59]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 7, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,Rosedale,Downtown Toronto,7,Park,Playground,Trail,Building,Diner,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


### Cluster 9

In [60]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 8, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,Davisville North,Central Toronto,8,Clothing Store,Food & Drink Shop,Convenience Store,Sandwich Place,Park,Breakfast Spot,Hotel,Doner Restaurant,Donut Shop,Dumpling Restaurant


### Cluster 10

In [61]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 9, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
87,Business Reply Mail Processing Centre 969 Eastern,East Toronto,9,Light Rail Station,Yoga Studio,Garden Center,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Skate Park,Farmers Market


### Cluster 11

In [62]:
Toronto_Merged.loc[Toronto_Merged['Cluster Labels'] == 10, Toronto_Merged.columns[[2]+[1] + list(range(5, Toronto_Merged.shape[1]))]]

Unnamed: 0,Neighborhood,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
68,"CN Tower, Bathurst Quay, Island airport, Harbo...",Downtown Toronto,10,Airport Service,Airport Terminal,Airport Lounge,Coffee Shop,Boat or Ferry,Boutique,Sculpture Garden,Bar,Plane,Airport Gate
