# Segmenting and Clustering Neighborhoods In Toronto

# I. Extract the dataframe from wikipedia

## 1. Load Data From URL

In [1]:
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from urllib.request import urlopen
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urllib.request.urlopen(url) 
soup = BeautifulSoup(html, 'lxml')

In [2]:
table=soup.find('table', class_='wikitable sortable')
table

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighborhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<td>

In [3]:
A=[]
B=[]
C=[]

for row in table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

## 2. Import Pandas to convert the data into dataframe

In [4]:
import pandas as pd
df=pd.DataFrame(A,columns=['Postal Code'])
df['Borough']=B
df['Neighborhood']=C
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


## 3. Clean up the table by dropping the "Not assigned" values

In [5]:
df1=df[~df.Borough.str.contains("Not assigned")]
df1.shape

(103, 3)

In [6]:
df1['Neighborhood'].str.contains('Not assigned').value_counts()

False    103
Name: Neighborhood, dtype: int64

In [7]:
df1.reset_index(drop=True, inplace=True)
df1

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [68]:
df1.shape

(103, 3)

# II. Create the second dataframe with coordinates

## 1. Use csv file to add Latitude and Longitude to dataframe

In [8]:
urll='http://cocl.us/Geospatial_data'
ll=pd.read_csv(urll)

In [9]:
ll.sort_values(by=['Postal Code'], inplace=True)
ll

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [37]:
df1['Postal Code']=[line.strip('\n') for line in df1['Postal Code']]
df1['Borough']=[line.strip('\n') for line in df1['Borough']]
df1['Neighborhood']=[line.strip('\n') for line in df1['Neighborhood']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


## 2. Merge two dataframes together

In [38]:
df3=pd.merge(df1, ll, on=['Postal Code'])
df3

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [69]:
df3.shape

(103, 5)

# III. Explore neighborhoods in Toronto

## 1. import libraries

In [12]:
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    ------------------------------------------------------------
                       

In [13]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.50-py_0           conda-forge
    geopy:         1.22.0-pyh9f0ad1d_0 conda-forge


Downloading and Extracting Packages
geopy-1.22.0         | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done

In [39]:
print(df3['Borough'].unique())
print('The dataframe has {} Boroughs.'.format(len(df3['Borough'].unique())))


['North York' 'Downtown Toronto' 'Etobicoke' 'Scarborough' 'East York'
 'York' 'East Toronto' 'West Toronto' 'Central Toronto' 'Mississauga']
The dataframe has 10 Boroughs.


## 2. Use geopy library to get coordinates of Toronto

In [40]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="trt_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Canada are 43.6534817, -79.3839347.


## 3. Create a map of Toronto with neighborhoods superimposed on top

In [41]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## 4. Define Foursquare credentials and version

In [18]:
CLIENT_ID = 'PQFAY0KMBXRO3HANFRMOB0RRY002OP2O2NJ25TV0SZGNXNHE' 
CLIENT_SECRET = 'NNRDSP0TZJCCQUPB4ZS01G3VLCLJSWISYN4R5UKK043KH2J0' 
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PQFAY0KMBXRO3HANFRMOB0RRY002OP2O2NJ25TV0SZGNXNHE
CLIENT_SECRET:NNRDSP0TZJCCQUPB4ZS01G3VLCLJSWISYN4R5UKK043KH2J0


## 5. Explore Neighborhoods in Toronto

In [42]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng)
            
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [43]:
toronto_venues = getNearbyVenues(names=df3['Neighborhood'],
                                   latitudes=df3['Latitude'],
                                   longitudes=df3['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [44]:
print(toronto_venues.shape)
toronto_venues.head()

(3090, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Donalda Golf & Country Club,43.752816,-79.342741,Golf Course
2,Parkwoods,43.753259,-79.329656,Galleria Supermarket,43.75352,-79.349518,Supermarket
3,Parkwoods,43.753259,-79.329656,Island Foods,43.745866,-79.346035,Caribbean Restaurant
4,Parkwoods,43.753259,-79.329656,Graydon Hall Manor,43.763923,-79.342961,Event Space


In [45]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,30,30,30,30,30,30
"Alderwood, Long Branch",30,30,30,30,30,30
"Bathurst Manor, Wilson Heights, Downsview North",30,30,30,30,30,30
Bayview Village,30,30,30,30,30,30
"Bedford Park, Lawrence Manor East",30,30,30,30,30,30
Berczy Park,30,30,30,30,30,30
"Birch Cliff, Cliffside West",30,30,30,30,30,30
"Brockton, Parkdale Village, Exhibition Place",30,30,30,30,30,30
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",30,30,30,30,30,30
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",30,30,30,30,30,30


In [46]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 258 uniques categories.


## 6. Analyze each neighborhood

In [47]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Zoo Exhibit,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Aquarium,Art Gallery,Art Museum,...,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
toronto_onehot.shape

(3090, 258)

### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [49]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Aquarium,Art Gallery,...,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,Agincourt,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,"Alderwood, Long Branch",0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.000000,0.000000,0.033333,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.033333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,Bayview Village,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,"Bedford Park, Lawrence Manor East",0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.033333,0.000000,0.000000,0.000000,0.000000
5,Berczy Park,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.033333,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,"Birch Cliff, Cliffside West",0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,"Brockton, Parkdale Village, Exhibition Place",0.0,0.000000,0.000000,0.000000,0.033333,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,"Business reply mail Processing Centre, South C...",0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.000000,0.000000,0.033333,0.000000,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.033333,0.000000


In [50]:
toronto_grouped.shape

(99, 258)

### Print each neighborhood along with the top 5 most common venues

In [51]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                  venue  freq
0     Indian Restaurant  0.13
1    Chinese Restaurant  0.10
2  Caribbean Restaurant  0.10
3        Breakfast Spot  0.07
4  Gym / Fitness Center  0.07


----Alderwood, Long Branch----
            venue  freq
0     Coffee Shop  0.10
1            Café  0.07
2             Pub  0.03
3  Breakfast Spot  0.03
4             Gym  0.03


----Bathurst Manor, Wilson Heights, Downsview North----
                       venue  freq
0              Deli / Bodega  0.07
1                Coffee Shop  0.07
2  Middle Eastern Restaurant  0.07
3          French Restaurant  0.07
4         Seafood Restaurant  0.03


----Bayview Village----
                    venue  freq
0      Chinese Restaurant  0.10
1             Supermarket  0.07
2  Furniture / Home Store  0.07
3         Thai Restaurant  0.07
4         Bubble Tea Shop  0.07


----Bedford Park, Lawrence Manor East----
                venue  freq
0              Bakery  0.07
1          Bagel Shop  0.07
2  Italian 

               venue  freq
0            Brewery  0.13
1               Park  0.13
2  Indian Restaurant  0.07
3              Beach  0.07
4               Café  0.07


----Islington Avenue, Humber Valley Village----
                venue  freq
0              Bakery  0.13
1         Golf Course  0.07
2        Dessert Shop  0.07
3    Tapas Restaurant  0.07
4  Italian Restaurant  0.07


----Kennedy Park, Ionview, East Birchmount Park----
               venue  freq
0               Park  0.13
1       Burger Joint  0.07
2       Liquor Store  0.07
3        Coffee Shop  0.07
4  Indian Restaurant  0.07


----Kensington Market, Chinatown, Grange Park----
                           venue  freq
0                           Café  0.10
1          Vietnamese Restaurant  0.07
2                         Bakery  0.07
3             Mexican Restaurant  0.07
4  Vegetarian / Vegan Restaurant  0.07


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens----
                venue  freq
0       

            venue  freq
0           Beach  0.13
1  Breakfast Spot  0.10
2             Pub  0.07
3     Coffee Shop  0.07
4       BBQ Joint  0.07


----The Danforth West, Riverdale----
                venue  freq
0    Greek Restaurant  0.13
1      Ice Cream Shop  0.07
2  Italian Restaurant  0.07
3                Park  0.07
4              Bakery  0.07


----The Kingsway, Montgomery Road, Old Mill North----
                venue  freq
0        Burger Joint  0.07
1  Italian Restaurant  0.07
2    Tapas Restaurant  0.07
3              Bakery  0.07
4   French Restaurant  0.07


----Thorncliffe Park----
            venue  freq
0          Bakery  0.13
1   Grocery Store  0.07
2         Dog Run  0.03
3   Historic Site  0.03
4  Sandwich Place  0.03


----Toronto Dominion Centre, Design Exchange----
         venue  freq
0  Coffee Shop  0.17
1         Café  0.13
2   Restaurant  0.07
3       Bakery  0.07
4        Hotel  0.03


----University of Toronto, Harbord----
         venue  freq
0         Café 

### Create a dataframe with above data

In [52]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [53]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Indian Restaurant,Caribbean Restaurant,Chinese Restaurant,Breakfast Spot,Gym / Fitness Center,Supermarket,Sporting Goods Shop,Restaurant,Cantonese Restaurant,Sri Lankan Restaurant
1,"Alderwood, Long Branch",Coffee Shop,Café,Pub,Seafood Restaurant,Breakfast Spot,Pizza Place,Fast Food Restaurant,South American Restaurant,Burger Joint,Burrito Place
2,"Bathurst Manor, Wilson Heights, Downsview North",Deli / Bodega,French Restaurant,Middle Eastern Restaurant,Coffee Shop,Airport,Bagel Shop,Liquor Store,Sushi Restaurant,Climbing Gym,Jewelry Store
3,Bayview Village,Chinese Restaurant,Thai Restaurant,Supermarket,Furniture / Home Store,Bubble Tea Shop,Creperie,Szechuan Restaurant,Sporting Goods Shop,Shopping Mall,Seafood Restaurant
4,"Bedford Park, Lawrence Manor East",Bakery,Bagel Shop,Breakfast Spot,Toy / Game Store,Sushi Restaurant,Sandwich Place,Sports Club,Restaurant,Garden,Deli / Bodega


## 7. Cluster Neighborhoods

In [54]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

kmeans.labels_[0:10] 

array([1, 2, 4, 4, 1, 2, 3, 2, 3, 3], dtype=int32)

In [55]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df3

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,4,Middle Eastern Restaurant,Mediterranean Restaurant,Gym / Fitness Center,Caribbean Restaurant,Café,American Restaurant,Italian Restaurant,Movie Theater,Burrito Place,Mexican Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,4,Middle Eastern Restaurant,Grocery Store,Coffee Shop,Indian Restaurant,Mexican Restaurant,Mediterranean Restaurant,Shopping Mall,Liquor Store,Supermarket,New American Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2,Coffee Shop,Park,Café,Bakery,Pub,Italian Restaurant,Spa,Breakfast Spot,Mediterranean Restaurant,Farmers Market
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Clothing Store,Liquor Store,Fried Chicken Joint,Furniture / Home Store,Restaurant,Athletics & Sports,Men's Store,Shopping Mall,Brewery,Boutique
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3,Coffee Shop,Park,Sushi Restaurant,Burger Joint,Hobby Shop,Burrito Place,Beer Bar,Mexican Restaurant,Bubble Tea Shop,Sandwich Place


In [56]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine each cluster

### Cluster 1

In [57]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,0,Zoo Exhibit,Burger Joint,Zoo,Breakfast Spot,National Park,Fried Chicken Joint,Liquor Store,Bakery,Sushi Restaurant,Italian Restaurant
95,Scarborough,0,Zoo Exhibit,Supermarket,Hakka Restaurant,Zoo,Burger Joint,Farm,Sandwich Place,National Park,Bakery,Liquor Store


In [59]:
print('Cluster 1 is popular for Zoo and National Park.')

Cluster 1 is popular for Zoo and National Park.


### Cluster 2

In [60]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,North York,1,Clothing Store,Liquor Store,Fried Chicken Joint,Furniture / Home Store,Restaurant,Athletics & Sports,Men's Store,Shopping Mall,Brewery,Boutique
8,East York,1,Sandwich Place,Gastropub,Park,Indian Restaurant,Coffee Shop,Brewery,Rock Climbing Spot,Restaurant,Gaming Cafe,Playground
9,Downtown Toronto,1,Ramen Restaurant,Theater,Café,Clothing Store,Coffee Shop,College Rec Center,Sporting Goods Shop,Burger Joint,Burrito Place,Shopping Mall
10,North York,1,Clothing Store,Restaurant,Furniture / Home Store,Coffee Shop,Chocolate Shop,Sandwich Place,Fried Chicken Joint,Bike Shop,Sushi Restaurant,Men's Store
17,Etobicoke,1,Grocery Store,Golf Course,Coffee Shop,Sporting Goods Shop,Eastern European Restaurant,Liquor Store,Park,Restaurant,Sandwich Place,Racetrack
26,Scarborough,1,Caribbean Restaurant,Gym / Fitness Center,Indian Restaurant,Clothing Store,Coffee Shop,Hotel,Restaurant,Bookstore,Sri Lankan Restaurant,Breakfast Spot
27,North York,1,Bakery,Chinese Restaurant,Japanese Restaurant,Caribbean Restaurant,Coffee Shop,Hotel,Bagel Shop,Cantonese Restaurant,Food,Bubble Tea Shop
29,East York,1,Bakery,Grocery Store,Historic Site,Furniture / Home Store,Supermarket,Sporting Goods Shop,Brewery,Fish & Chips Shop,Farmers Market,Science Museum
36,Downtown Toronto,1,Hotel,Plaza,Park,Bubble Tea Shop,Basketball Stadium,Bistro,Lake,Supermarket,Lounge,Brewery
37,West Toronto,1,Bar,Men's Store,Vietnamese Restaurant,Asian Restaurant,Ice Cream Shop,Pizza Place,French Restaurant,New American Restaurant,Korean Restaurant,Cocktail Bar


In [61]:
print('Cluster 2 is popular for shopping and dining.')

Cluster 2 is popular for shopping and dining.


### Cluster 3

In [62]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,2,Coffee Shop,Park,Café,Bakery,Pub,Italian Restaurant,Spa,Breakfast Spot,Mediterranean Restaurant,Farmers Market
5,Etobicoke,2,Bakery,Dessert Shop,Tapas Restaurant,Italian Restaurant,Golf Course,Liquor Store,Gastropub,Garden,French Restaurant,Sushi Restaurant
11,Etobicoke,2,Coffee Shop,Café,Grocery Store,Liquor Store,Park,Bakery,Sporting Goods Shop,Pizza Place,Seafood Restaurant,Eastern European Restaurant
14,East York,2,Coffee Shop,Gastropub,Café,Park,Breakfast Spot,Gaming Cafe,Cocktail Bar,Sandwich Place,Ethiopian Restaurant,Brewery
15,Downtown Toronto,2,Gastropub,Restaurant,Coffee Shop,Café,Hotel,Farmers Market,Thai Restaurant,Japanese Restaurant,Food Truck,Middle Eastern Restaurant
16,York,2,Café,Italian Restaurant,Middle Eastern Restaurant,Coffee Shop,Ice Cream Shop,Bagel Shop,Trail,BBQ Joint,Thai Restaurant,Caribbean Restaurant
20,Downtown Toronto,2,Coffee Shop,Seafood Restaurant,Cocktail Bar,Food Truck,Liquor Store,Jazz Club,Basketball Stadium,Tailor Shop,Bistro,Lounge
21,York,2,Italian Restaurant,Coffee Shop,Mexican Restaurant,Sushi Restaurant,Brazilian Restaurant,Bike Shop,Furniture / Home Store,Café,Thai Restaurant,Bar
24,Downtown Toronto,2,Coffee Shop,Café,Japanese Restaurant,Italian Restaurant,Hotel,Bubble Tea Shop,Middle Eastern Restaurant,Steakhouse,Sushi Restaurant,Sandwich Place
25,Downtown Toronto,2,Café,Korean Restaurant,Cocktail Bar,Indian Restaurant,Coffee Shop,South American Restaurant,Spa,Playground,Music Store,Taco Place


In [63]:
print('Cluster 3 is popular for drinks and social networking.')

Cluster 3 is popular for drinks and social networking.


### Cluster 4

In [64]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Downtown Toronto,3,Coffee Shop,Park,Sushi Restaurant,Burger Joint,Hobby Shop,Burrito Place,Beer Bar,Mexican Restaurant,Bubble Tea Shop,Sandwich Place
12,Scarborough,3,Park,Zoo Exhibit,Pub,Breakfast Spot,Burger Joint,Grocery Store,Zoo,BBQ Joint,Pharmacy,Food & Drink Shop
18,Scarborough,3,Park,Indian Restaurant,Coffee Shop,Breakfast Spot,Burger Joint,Gym,Food & Drink Shop,Fried Chicken Joint,Bakery,Italian Restaurant
19,East Toronto,3,Beach,Breakfast Spot,Pub,Coffee Shop,BBQ Joint,Toy / Game Store,Middle Eastern Restaurant,Mexican Restaurant,Skating Rink,Nail Salon
22,Scarborough,3,Caribbean Restaurant,Indian Restaurant,Park,Pub,Coffee Shop,Hotel,Restaurant,Fish & Chips Shop,Burger Joint,Food & Drink Shop
23,East York,3,Park,Café,Grocery Store,Bakery,Sports Bar,Indian Restaurant,Brewery,Sporting Goods Shop,Seafood Restaurant,Supermarket
32,Scarborough,3,Coffee Shop,Park,Indian Restaurant,Pub,Burger Joint,Gym,Ice Cream Shop,Butcher,Bookstore,Beach
38,Scarborough,3,Park,Coffee Shop,Burger Joint,Liquor Store,Indian Restaurant,Sports Bar,Ice Cream Shop,Pizza Place,Pet Store,Discount Store
44,Scarborough,3,Park,Thai Restaurant,Gastropub,Liquor Store,Coffee Shop,Café,Rock Climbing Spot,Sandwich Place,Restaurant,Ice Cream Shop
47,East Toronto,3,Park,Brewery,Café,Indian Restaurant,Beach,Middle Eastern Restaurant,Fish & Chips Shop,Soccer Field,Snack Place,Fast Food Restaurant


In [65]:
print('Cluster 4 is popular for indoor and outdoor exercising.')

Cluster 4 is popular for indoor and outdoor exercising.


### Cluster 5

In [66]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4,Middle Eastern Restaurant,Mediterranean Restaurant,Gym / Fitness Center,Caribbean Restaurant,Café,American Restaurant,Italian Restaurant,Movie Theater,Burrito Place,Mexican Restaurant
1,North York,4,Middle Eastern Restaurant,Grocery Store,Coffee Shop,Indian Restaurant,Mexican Restaurant,Mediterranean Restaurant,Shopping Mall,Liquor Store,Supermarket,New American Restaurant
7,North York,4,Middle Eastern Restaurant,Bakery,Café,Shopping Mall,Other Great Outdoors,Italian Restaurant,Supermarket,New American Restaurant,Restaurant,Bank
13,North York,4,Middle Eastern Restaurant,Bakery,Café,Shopping Mall,Other Great Outdoors,Italian Restaurant,Supermarket,New American Restaurant,Restaurant,Bank
28,North York,4,Deli / Bodega,French Restaurant,Middle Eastern Restaurant,Coffee Shop,Airport,Bagel Shop,Liquor Store,Sushi Restaurant,Climbing Gym,Jewelry Store
33,North York,4,Bakery,Middle Eastern Restaurant,Chinese Restaurant,Caribbean Restaurant,Hotel,Burrito Place,Furniture / Home Store,Breakfast Spot,Bubble Tea Shop,Burger Joint
34,North York,4,Coffee Shop,Middle Eastern Restaurant,Supermarket,Mediterranean Restaurant,Restaurant,Caribbean Restaurant,Pizza Place,Massage Studio,Music Store,Caucasian Restaurant
39,North York,4,Chinese Restaurant,Thai Restaurant,Supermarket,Furniture / Home Store,Bubble Tea Shop,Creperie,Szechuan Restaurant,Sporting Goods Shop,Shopping Mall,Seafood Restaurant
45,North York,4,Furniture / Home Store,Supermarket,Japanese Restaurant,Hotel,French Restaurant,Liquor Store,Botanical Garden,Fish Market,Mediterranean Restaurant,Middle Eastern Restaurant
52,North York,4,Korean Restaurant,Middle Eastern Restaurant,Sushi Restaurant,Bubble Tea Shop,Café,Creperie,Supermarket,Burger Joint,Seafood Restaurant,Coffee Shop


In [67]:
print('Cluster 5 is popular for international cuisine.')

Cluster 5 is popular for international cuisine.
