## Finding the best place to open a Italian Restaurant in Toronto

### a. Prepraring the dataframe with Toronto's neighborhoods and its coordinates

1. Scraping the table of wikipedia page

In [341]:
import pandas as pd
import numpy as np
link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
tables = pd.read_html(link)
df = tables[0].iloc[0:]
df.columns = ['Postcode','Borough','Neighborhood']
df.head()


Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


2. Droping rows with Borough='Not assigned'

In [342]:
df=df[df.loc[:,'Borough']!='Not assigned'].reset_index(drop=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


3. Changing the 'Not assigned' in Neighbourhoold cell by the Borough value

In [343]:
for i in range(df.shape[0]):
    if df.iloc[i,2]=='Not assigned':
        df.iloc[i,2]=df.iloc[i,1]
#Checking that there is no 'Not assigned' value on Neighbourhood column
(df['Neighborhood']=='Not assigned').value_counts()

False    211
Name: Neighborhood, dtype: int64

4. Merging Neighbourhoods separated by commas with common Postcode

In [344]:
from pandas import Series 
def f(x):
     return Series(dict(Neighbourhood = "%s" % ', '.join(x['Neighborhood'])))
df=df.groupby(['Postcode','Borough']).apply(f)
df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Neighbourhood
Postcode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [345]:
df.reset_index(inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [346]:
df.shape

(103, 3)

5. Adding latitude and longitude

In [347]:
df_latlon=pd.read_csv('http://cocl.us/Geospatial_data')
df_latlon.rename(columns={'Postal Code':'Postcode'},inplace=True)
df_latlon.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [348]:
df_merged=pd.merge(df,df_latlon, on='Postcode')
df_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [349]:
df_toronto=df_merged

### Finding top venues as restaurants in Toronto

#### 1. Download and Explore Dataset

In [350]:
#Make the appropiate imports
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


usage: conda-script.py [-h] [-V] command ...
conda-script.py: error: unrecognized arguments: # uncomment this line if you haven't completed the Foursquare API lab


In [351]:
# create map of Toronto using latitude and longitude values
#Coordinates of Toronto
latitude= 43.651070
longitude= -79.347015
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto_filtered['Latitude'], df_toronto_filtered['Longitude'], df_toronto_filtered['Borough'], df_toronto_filtered['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [352]:
#My Foursquare api credentials
CLIENT_ID = 'XTQAPH10RWKRAHYMCLY511LLPMUGMH1524KK3MHJOXIJ4VKY' # your Foursquare ID
CLIENT_SECRET = 'MPTM4BZH0UK20QAJIHRSZ4Z4T3IXWB42J2TOO0MH4510UGTH' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XTQAPH10RWKRAHYMCLY511LLPMUGMH1524KK3MHJOXIJ4VKY
CLIENT_SECRET:MPTM4BZH0UK20QAJIHRSZ4Z4T3IXWB42J2TOO0MH4510UGTH


#### 2. Explore Neighborhoods

In [353]:
def getNearbyVenues(names, latitudes, longitudes, radius=800):
    
    LIMIT=150
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [354]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighbourhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [355]:
print(toronto_venues.shape)
toronto_venues.head()

(3988, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
2,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
3,"Rouge, Malvern",43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store
4,"Rouge, Malvern",43.806686,-79.194353,Tim Hortons,43.802,-79.198169,Coffee Shop


#### 3. Filtering only Restaurant and food places venues

In [356]:
toronto_venues['Venue Category'].value_counts()

Coffee Shop                                 302
Café                                        165
Park                                        117
Pizza Place                                 115
Italian Restaurant                          105
Restaurant                                  104
Bakery                                       88
Bar                                          71
Sandwich Place                               71
Hotel                                        69
Japanese Restaurant                          66
Fast Food Restaurant                         65
Grocery Store                                65
Sushi Restaurant                             58
Gym                                          56
Gastropub                                    52
Pub                                          51
Pharmacy                                     48
American Restaurant                          46
Breakfast Spot                               45
Chinese Restaurant                      

In [357]:

#We work only with the boroughs than contains the word 'Toronto'
list_restaurant_types=['Restaurant','Pizza','Sandwich','Gastropub','Steakhouse','Burger','Burrito', 'BBQ', 'Salad Place', 'Bistro', 'Noodle House']
toronto_venues_f=toronto_venues[toronto_venues['Venue Category'].str.contains('|'.join(list_restaurant_types))].reset_index(drop=True)
toronto_venues_f.shape

(1345, 7)

In [358]:
toronto_venues_f.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
1,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
2,"Rouge, Malvern",43.806686,-79.194353,Harvey's,43.800106,-79.198258,Fast Food Restaurant
3,"Rouge, Malvern",43.806686,-79.194353,Charley's Exotic Cuisine,43.800982,-79.200233,Chinese Restaurant
4,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Fratelli Village Pizzeria,43.784008,-79.169787,Italian Restaurant


In [359]:
toronto_venues_f.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",38,38,38,38,38,38
Agincourt,9,9,9,9,9,9
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",12,12,12,12,12,12
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",6,6,6,6,6,6
"Alderwood, Long Branch",3,3,3,3,3,3
"Bathurst Manor, Downsview North, Wilson Heights",8,8,8,8,8,8
Bayview Village,3,3,3,3,3,3
"Bedford Park, Lawrence Manor East",15,15,15,15,15,15
Berczy Park,28,28,28,28,28,28
"Birch Cliff, Cliffside West",1,1,1,1,1,1


In [360]:
print('There are {} uniques categories.'.format(len(toronto_venues_f['Venue Category'].unique())))

There are 69 uniques categories.


#### 4. Analyze Each Neighborhood

In [361]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues_f[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues_f['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Asian Restaurant,BBQ Joint,Belgian Restaurant,Bistro,Brazilian Restaurant,Burger Joint,Burrito Place,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Comfort Food Restaurant,Cuban Restaurant,Dim Sum Restaurant,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Gastropub,German Restaurant,Greek Restaurant,Hakka Restaurant,Hawaiian Restaurant,Hotpot Restaurant,Indian Chinese Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Jewish Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,New American Restaurant,Noodle House,Pakistani Restaurant,Pizza Place,Portuguese Restaurant,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Steakhouse,Sushi Restaurant,Syrian Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Tibetan Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [362]:
toronto_onehot.shape

(1345, 70)

In [363]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Asian Restaurant,BBQ Joint,Belgian Restaurant,Bistro,Brazilian Restaurant,Burger Joint,Burrito Place,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Comfort Food Restaurant,Cuban Restaurant,Dim Sum Restaurant,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Gastropub,German Restaurant,Greek Restaurant,Hakka Restaurant,Hawaiian Restaurant,Hotpot Restaurant,Indian Chinese Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Jewish Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,New American Restaurant,Noodle House,Pakistani Restaurant,Pizza Place,Portuguese Restaurant,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,South American Restaurant,Southern / Soul Food Restaurant,Steakhouse,Sushi Restaurant,Syrian Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Tibetan Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"Adelaide, King, Richmond",0.0,0.078947,0.078947,0.0,0.0,0.0,0.026316,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.078947,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.078947,0.0,0.026316,0.078947,0.026316,0.026316,0.026316,0.0,0.0,0.078947,0.105263,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.026316,0.0
1,Agincourt,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Bedford Park, Lawrence Manor East",0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0
8,Berczy Park,0.0,0.0,0.0,0.071429,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.071429,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.107143,0.107143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0,0.0,0.178571,0.035714,0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.035714,0.0
9,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [364]:
toronto_grouped.shape

(90, 70)

In [365]:
#Lets princ each neighborhood with the top5 most common venues
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0     Sushi Restaurant  0.11
1           Steakhouse  0.08
2            Gastropub  0.08
3     Asian Restaurant  0.08
4  American Restaurant  0.08


----Agincourt----
                       venue  freq
0         Chinese Restaurant  0.22
1             Sandwich Place  0.11
2           Sushi Restaurant  0.11
3  Latin American Restaurant  0.11
4           Malay Restaurant  0.11


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                  venue  freq
0    Chinese Restaurant  0.25
1           Pizza Place  0.17
2  Fast Food Restaurant  0.08
3      Malay Restaurant  0.08
4             BBQ Joint  0.08


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0           Pizza Place  0.67
1        Sandwich Place  0.17
2  Fast Food Restaurant  0.17
3     Afghan Restaurant  0.00
4            Restaurant  0.00


----Alderwood,

                       venue  freq
0      Vietnamese Restaurant   0.5
1                 Restaurant   0.5
2          Korean Restaurant   0.0
3  Latin American Restaurant   0.0
4           Malay Restaurant   0.0


----Downsview Northwest----
                   venue  freq
0   Fast Food Restaurant  0.22
1            Pizza Place  0.22
2  Vietnamese Restaurant  0.11
3    American Restaurant  0.11
4         Sandwich Place  0.11


----Downsview West----
                             venue  freq
0            Vietnamese Restaurant   0.5
1                      Pizza Place   0.5
2  Molecular Gastronomy Restaurant   0.0
3                 Ramen Restaurant   0.0
4            Portuguese Restaurant   0.0


----East Birchmount Park, Ionview, Kennedy Park----
                  venue  freq
0        Sandwich Place  0.33
1  Fast Food Restaurant  0.33
2    Chinese Restaurant  0.33
3           Salad Place  0.00
4      Ramen Restaurant  0.00


----East Toronto----
                  venue  freq
0      Greek Res

                  venue  freq
0  Fast Food Restaurant  0.50
1            Restaurant  0.17
2        Sandwich Place  0.17
3           Pizza Place  0.17
4     Afghan Restaurant  0.00


----St. James Town----
                 venue  freq
0   Italian Restaurant  0.13
1           Restaurant  0.13
2            Gastropub  0.10
3  American Restaurant  0.10
4   Seafood Restaurant  0.10


----Stn A PO Boxes 25 The Esplanade----
                 venue  freq
0           Restaurant  0.16
1   Italian Restaurant  0.13
2  Japanese Restaurant  0.13
3   Seafood Restaurant  0.10
4            Gastropub  0.10


----Studio District----
                 venue  freq
0   Italian Restaurant   0.1
1     Sushi Restaurant   0.1
2       Sandwich Place   0.1
3          Pizza Place   0.1
4  American Restaurant   0.1


----The Annex, North Midtown, Yorkville----
                           venue  freq
0             Italian Restaurant  0.11
1  Vegetarian / Vegan Restaurant  0.08
2                    Pizza Place  0.08
3  

In [366]:
#functions fo short venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [367]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Adelaide, King, Richmond",Sushi Restaurant,American Restaurant,Asian Restaurant,Steakhouse,Gastropub
1,Agincourt,Chinese Restaurant,Sushi Restaurant,American Restaurant,Sandwich Place,Latin American Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Chinese Restaurant,Pizza Place,Korean Restaurant,Noodle House,Fast Food Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Pizza Place,Sandwich Place,Fast Food Restaurant,Vietnamese Restaurant,Doner Restaurant
4,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Vietnamese Restaurant,Doner Restaurant,Eastern European Restaurant


#### 5 Cluster Neighborhoods

In [368]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 2, 2, 3, 3, 3, 3, 4])

In [369]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0.0,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,3.0,Italian Restaurant,Burger Joint,Indian Chinese Restaurant,Hotpot Restaurant,Hawaiian Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2.0,Pizza Place,Fast Food Restaurant,Mexican Restaurant,Greek Restaurant,French Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,,,,,,
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,3.0,Indian Restaurant,Hakka Restaurant,Chinese Restaurant,Caribbean Restaurant,Thai Restaurant


In [370]:
toronto_merged.shape
toronto_merged.dropna(inplace=True)
toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0.0,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,3.0,Italian Restaurant,Burger Joint,Indian Chinese Restaurant,Hotpot Restaurant,Hawaiian Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2.0,Pizza Place,Fast Food Restaurant,Mexican Restaurant,Greek Restaurant,French Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,3.0,Indian Restaurant,Hakka Restaurant,Chinese Restaurant,Caribbean Restaurant,Thai Restaurant
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,0.0,Fast Food Restaurant,Pizza Place,Restaurant,Sandwich Place,Dumpling Restaurant


In [371]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[np.int(cluster)-1],
        fill=True,
        fill_color=rainbow[np.int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### 6. Examine Clusters

In [375]:

#Cluster 1
cluster1=toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster1

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Scarborough,0.0,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
5,Scarborough,0.0,Fast Food Restaurant,Pizza Place,Restaurant,Sandwich Place,Dumpling Restaurant
6,Scarborough,0.0,Sandwich Place,Chinese Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Dumpling Restaurant
7,Scarborough,0.0,Pizza Place,Fast Food Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
8,Scarborough,0.0,Fast Food Restaurant,Pizza Place,Burger Joint,Vietnamese Restaurant,French Restaurant
10,Scarborough,0.0,Indian Restaurant,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,BBQ Joint
15,Scarborough,0.0,Fast Food Restaurant,Chinese Restaurant,Pizza Place,Noodle House,Sandwich Place
17,North York,0.0,Pizza Place,Sandwich Place,Korean Restaurant,Chinese Restaurant,Fast Food Restaurant
18,North York,0.0,Fast Food Restaurant,Japanese Restaurant,American Restaurant,Restaurant,Sandwich Place
33,North York,0.0,Pizza Place,Fast Food Restaurant,Sandwich Place,Falafel Restaurant,Caribbean Restaurant


In [385]:
cluster1_pp=cluster1[cluster1.loc[:]!='Pizza Place']
cluster1_pp.dropna(inplace=True)
cluster1_pp

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Scarborough,0.0,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
6,Scarborough,0.0,Sandwich Place,Chinese Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Dumpling Restaurant
10,Scarborough,0.0,Indian Restaurant,Fast Food Restaurant,Chinese Restaurant,Vietnamese Restaurant,BBQ Joint
18,North York,0.0,Fast Food Restaurant,Japanese Restaurant,American Restaurant,Restaurant,Sandwich Place
74,York,0.0,Mexican Restaurant,Japanese Restaurant,Fast Food Restaurant,Gastropub,Empanada Restaurant


In [377]:
#Cluster 2
cluster2=toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster2

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
32,North York,1.0,Vietnamese Restaurant,Restaurant,American Restaurant,Dumpling Restaurant,Indian Chinese Restaurant
44,Central Toronto,1.0,Restaurant,French Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant
80,York,1.0,Restaurant,Sandwich Place,Dim Sum Restaurant,Dumpling Restaurant,Eastern European Restaurant


In [388]:
cluster2_pp=cluster2[cluster2.loc[:]!='Pizza Place']
cluster2_pp.dropna(inplace=True)
cluster2_pp

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
32,North York,1.0,Vietnamese Restaurant,Restaurant,American Restaurant,Dumpling Restaurant,Indian Chinese Restaurant
44,Central Toronto,1.0,Restaurant,French Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant
80,York,1.0,Restaurant,Sandwich Place,Dim Sum Restaurant,Dumpling Restaurant,Eastern European Restaurant


In [378]:
#Cluster 3
cluster3=toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster3

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,Scarborough,2.0,Pizza Place,Fast Food Restaurant,Mexican Restaurant,Greek Restaurant,French Restaurant
24,North York,2.0,Pizza Place,Vietnamese Restaurant,French Restaurant,Empanada Restaurant,Ethiopian Restaurant
31,North York,2.0,Vietnamese Restaurant,Pizza Place,French Restaurant,Empanada Restaurant,Ethiopian Restaurant
36,East York,2.0,Pizza Place,Asian Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
81,York,2.0,Pizza Place,Indian Restaurant,Sandwich Place,Thai Restaurant,Fast Food Restaurant
89,Etobicoke,2.0,Pizza Place,Sandwich Place,Vietnamese Restaurant,Doner Restaurant,Eastern European Restaurant
94,Etobicoke,2.0,Pizza Place,Mexican Restaurant,American Restaurant,French Restaurant,Empanada Restaurant
95,Etobicoke,2.0,Pizza Place,Vietnamese Restaurant,French Restaurant,Empanada Restaurant,Ethiopian Restaurant
96,North York,2.0,Pizza Place,Empanada Restaurant,Vietnamese Restaurant,French Restaurant,Ethiopian Restaurant
98,York,2.0,Pizza Place,Vietnamese Restaurant,French Restaurant,Empanada Restaurant,Ethiopian Restaurant


In [389]:
cluster3_pp=cluster3[cluster3.loc[:]!='Pizza Place']
cluster3_pp.dropna(inplace=True)
cluster3_pp

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue


In [390]:
#Cluster 4
cluster4=toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster4

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Scarborough,3.0,Italian Restaurant,Burger Joint,Indian Chinese Restaurant,Hotpot Restaurant,Hawaiian Restaurant
4,Scarborough,3.0,Indian Restaurant,Hakka Restaurant,Chinese Restaurant,Caribbean Restaurant,Thai Restaurant
11,Scarborough,3.0,Middle Eastern Restaurant,Vietnamese Restaurant,Restaurant,Korean Restaurant,Seafood Restaurant
12,Scarborough,3.0,Chinese Restaurant,Sushi Restaurant,American Restaurant,Sandwich Place,Latin American Restaurant
13,Scarborough,3.0,Pizza Place,Thai Restaurant,Noodle House,Chinese Restaurant,Sandwich Place
14,Scarborough,3.0,Chinese Restaurant,Pizza Place,Korean Restaurant,Noodle House,Fast Food Restaurant
19,North York,3.0,Japanese Restaurant,Chinese Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
21,North York,3.0,Korean Restaurant,Indian Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Sandwich Place
22,North York,3.0,Pizza Place,Korean Restaurant,Sushi Restaurant,Ramen Restaurant,Fast Food Restaurant
26,North York,3.0,Japanese Restaurant,Caribbean Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant


In [396]:
cluster4_pp=cluster4[cluster4.loc[:]!='Pizza Place']
cluster4_pp.dropna(inplace=True)
cluster4_pp

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Scarborough,3.0,Italian Restaurant,Burger Joint,Indian Chinese Restaurant,Hotpot Restaurant,Hawaiian Restaurant
4,Scarborough,3.0,Indian Restaurant,Hakka Restaurant,Chinese Restaurant,Caribbean Restaurant,Thai Restaurant
11,Scarborough,3.0,Middle Eastern Restaurant,Vietnamese Restaurant,Restaurant,Korean Restaurant,Seafood Restaurant
12,Scarborough,3.0,Chinese Restaurant,Sushi Restaurant,American Restaurant,Sandwich Place,Latin American Restaurant
19,North York,3.0,Japanese Restaurant,Chinese Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
21,North York,3.0,Korean Restaurant,Indian Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Sandwich Place
26,North York,3.0,Japanese Restaurant,Caribbean Restaurant,Vietnamese Restaurant,French Restaurant,Empanada Restaurant
27,North York,3.0,Japanese Restaurant,Italian Restaurant,Chinese Restaurant,Middle Eastern Restaurant,Fast Food Restaurant
30,North York,3.0,Vietnamese Restaurant,Middle Eastern Restaurant,Turkish Restaurant,Sandwich Place,Chinese Restaurant
34,North York,3.0,Portuguese Restaurant,French Restaurant,Eastern European Restaurant,Empanada Restaurant,Ethiopian Restaurant


In [393]:
#Cluster 5
cluster5=toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
cluster5

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
9,Scarborough,4.0,Thai Restaurant,Vietnamese Restaurant,French Restaurant,Eastern European Restaurant,Empanada Restaurant


In [394]:
cluster5_pp=cluster5[cluster5.loc[:]!='Pizza Place']
cluster5_pp.dropna(inplace=True)
cluster5_pp

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
9,Scarborough,4.0,Thai Restaurant,Vietnamese Restaurant,French Restaurant,Eastern European Restaurant,Empanada Restaurant
