# Battle of Neighbourhood

## Analyse Toronto neighbourhoods to start an Indian Restaurant

#### Read Toronto Postal Codes from Wiki page by making use of BeautifulSoup library

In [1]:
import requests
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
from bs4 import BeautifulSoup
soup = BeautifulSoup(url,'lxml')

#### Process the table from soup object and create data frame by extracting rows and columns from the table

In [2]:
torontoTable = soup.find('table', {'class':'wikitable sortable'})
import pandas as pd
rows = torontoTable.findAll('tr')

allData = []
for row in rows:
    td = row.findAll('td')
    if td != []:
        data = [tr.text.strip() for tr in td]
        allData.append(data)            
    
df = pd.DataFrame(allData, columns=['PostalCode', 'Borough', 'Neighborhood'])

#### Clean the data by removing 'Not assigned' value and assigning borough value to neighbourhood where it is not available and group all neighbourhood having same postal codes

In [3]:
neighbourNA = df.index[df['Neighborhood'] == 'Not assigned']
df['Neighborhood'][neighbourNA] = df['Borough'][neighbourNA]


boroughNA = df.index[df['Borough'] == 'Not assigned']
df.drop(boroughNA, inplace=True)



#### Census data about Toronto Neighbourhood from 2016 has many information about demographics. Make use of the immigration information to find Indian and Sri Lankan population in each neighbourhood. Sort the neighbourhood based on population and pick the top 50 neighbourhood.

In [4]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Category,Topic,Data Source,Characteristic,City of Toronto,Agincourt North,Agincourt South,Alderwood,Annex,Banbury-Don Mills,...,Willowdale West,Willowridge-Martingrove-Richview,Woburn,Woodbine Corridor,Woodbine-Lumsden,Wychwood,Yonge-Eglinton,Yonge-St.Clair,York University,Yorkdale-Glen Park
0,Immigration and citizenship,Immigrants by selected place of birth,Census Profile 98-316-X2016001,Total - Selected places of birth for the immig...,1266005,19990,19990,3965,8275,13205,...,9395,10040,30180,2930,2200,4355,3175,3585,15595,7270
1,Immigration and citizenship,Immigrants by selected place of birth,Census Profile 98-316-X2016001,Americas,212010,1635,1415,450,1630,1335,...,530,2065,4570,660,490,960,580,695,3645,1510
2,Immigration and citizenship,Immigrants by selected place of birth,Census Profile 98-316-X2016001,Brazil,7025,10,15,30,60,40,...,55,115,45,30,25,50,50,25,40,90
3,Immigration and citizenship,Immigrants by selected place of birth,Census Profile 98-316-X2016001,Colombia,8715,15,10,15,40,100,...,50,85,70,0,0,45,45,50,170,105
4,Immigration and citizenship,Immigrants by selected place of birth,Census Profile 98-316-X2016001,El Salvador,6955,10,30,20,10,10,...,0,75,65,10,0,10,10,10,160,45


In [5]:
df_data_1 = df_data_1[((df_data_1['Characteristic'] == 'India') | (df_data_1['Characteristic'] == 'Sri Lanka') )]
df_data_1.drop(columns=['Category','Topic','Data Source', 'Characteristic', 'City of Toronto'], inplace=True)

torontoAreaPopulation = df_data_1.T
torontoAreaPopulation.columns = ['India', 'Sri Lanka']
torontoAreaPopulation['Population'] = torontoAreaPopulation['India'] + torontoAreaPopulation['Sri Lanka']
torontoAreaPopulation.sort_values(['Population'], ascending=0, inplace=True)
torontoAreaPopulation = torontoAreaPopulation.head(50)
torontoAreaPopulation

Unnamed: 0,India,Sri Lanka,Population
Woburn,6680,4405,11085
Rouge,2940,7385,10325
Malvern,3380,4860,8240
West Humber-Clairville,6525,585,7110
Mount Olive,4815,945,5760
L'Amoreaux,1360,2620,3980
Bendale,1760,1795,3555
Dorset Park,1325,2190,3515
Agincourt North,945,2210,3155
Eglinton East,985,2070,3055


In [8]:

df = df[df['Neighborhood'].isin(torontoAreaPopulation.index)]
df

Unnamed: 0,PostalCode,Borough,Neighborhood
3,M4A,North York,Victoria Village
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern
27,M1C,Scarborough,Highland Creek
31,M3C,North York,Flemingdon Park
34,M5C,Downtown Toronto,St. James Town
43,M1E,Scarborough,Morningside
44,M1E,Scarborough,West Hill
53,M1G,Scarborough,Woburn
63,M2H,North York,Hillcrest Village


In [9]:
combined = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(lambda x: "%s" % ', '.join(x))
df2 = pd.DataFrame(combined)

df2 = df2.reset_index()
df2

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,Highland Creek
2,M1E,Scarborough,"Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1J,Scarborough,Scarborough Village
5,M1K,Scarborough,"Ionview, Kennedy Park"
6,M1L,Scarborough,Clairlea
7,M1M,Scarborough,Cliffcrest
8,M1P,Scarborough,Dorset Park
9,M1R,Scarborough,Wexford


#### Read the csv file having the postal code to latitude and longitude mapping information

In [10]:
csv = pd.read_csv('http://cocl.us/Geospatial_data')
csv.rename(columns={'Postal Code': 'LLPostalCode'}, inplace=True)


#### Merge both the data frame and drop the extra postal code column

In [11]:
result = pd.concat([df2, csv], axis=1, join = 'inner')
result.drop(['LLPostalCode'], axis=1, inplace=True)
result

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,"Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1J,Scarborough,Scarborough Village,43.773136,-79.239476
5,M1K,Scarborough,"Ionview, Kennedy Park",43.744734,-79.239476
6,M1L,Scarborough,Clairlea,43.727929,-79.262029
7,M1M,Scarborough,Cliffcrest,43.711112,-79.284577
8,M1P,Scarborough,Dorset Park,43.716316,-79.239476
9,M1R,Scarborough,Wexford,43.692657,-79.264848


#### Filter those borough having Toronto in their name

In [12]:
torontoArea = result

#### Import all required libraries for analysing toronto borough and map the data

In [13]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  14.80 MB/s
geopy-1.18.1-p 100% |################################| Time: 0:00:00  21.25 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  34.30 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  20.42 MB/s
vincent-0.4.4- 100% |###################

In [14]:
# Get the lat and lng for Toronto 
address = 'Toronto, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto City are 43.653963, -79.387207.


In [15]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(torontoArea['Latitude'], torontoArea['Longitude'], torontoArea['Borough'], torontoArea['Neighborhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [16]:
# The code was removed by Watson Studio for sharing.

Credentails Set


In [17]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [18]:
def getNearbyVenues(names, latitudes, longitudes):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            LIMIT,
            radius)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
                
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
# Get venues for all toronto boroughs
toronto_venues = getNearbyVenues(names=torontoArea['Neighborhood'],
                                   latitudes=torontoArea['Latitude'],
                                   longitudes=torontoArea['Longitude']
                                  )

Rouge, Malvern
Highland Creek
Morningside, West Hill
Woburn
Scarborough Village
Ionview, Kennedy Park
Clairlea
Cliffcrest
Dorset Park
Wexford
Tam O'Shanter
Agincourt North, Milliken
Hillcrest Village
Henry Farm
Bayview Village
Flemingdon Park
York University
Victoria Village
Thorncliffe Park
St. James Town
St. James Town
Humber Summit
Mount Olive, Thistletown


In [20]:
toronto_venues = toronto_venues[toronto_venues['Venue Category'].str.contains('Restaurant')]
toronto_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
16,"Rouge, Malvern",43.806686,-79.194353,Fratelli Village Pizzeria,43.784008,-79.169787,Italian Restaurant
18,"Rouge, Malvern",43.806686,-79.194353,Mona's Roti,43.791613,-79.251015,Caribbean Restaurant
20,"Rouge, Malvern",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
27,"Rouge, Malvern",43.806686,-79.194353,Babu Catering & Take Out,43.791721,-79.251132,Sri Lankan Restaurant
33,"Rouge, Malvern",43.806686,-79.194353,Lucky Lin's Restaurant,43.813613,-79.237943,Chinese Restaurant
35,"Rouge, Malvern",43.806686,-79.194353,Silver Spoon Pak-Indian Restaurant,43.791824,-79.25134,Indian Restaurant
44,"Rouge, Malvern",43.806686,-79.194353,Harvey's,43.800106,-79.198258,Fast Food Restaurant
49,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
50,"Rouge, Malvern",43.806686,-79.194353,Hakka Legend,43.776309,-79.234939,Chinese Restaurant
54,"Rouge, Malvern",43.806686,-79.194353,La Sani Grill,43.776214,-79.234848,Indian Restaurant


In [21]:
# Number of venues for each neighborhood
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Agincourt North, Milliken",48,48,48,48,48,48
Bayview Village,50,50,50,50,50,50
Clairlea,32,32,32,32,32,32
Cliffcrest,26,26,26,26,26,26
Dorset Park,17,17,17,17,17,17
Flemingdon Park,55,55,55,55,55,55
Henry Farm,53,53,53,53,53,53
Highland Creek,9,9,9,9,9,9
Hillcrest Village,57,57,57,57,57,57
Humber Summit,37,37,37,37,37,37


In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Asian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Doner Restaurant,Dumpling Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hakka Restaurant,Hong Kong Restaurant,Hotpot Restaurant,Hungarian Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Scandinavian Restaurant,Seafood Restaurant,Sri Lankan Restaurant,Sushi Restaurant,Szechuan Restaurant,Thai Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
16,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18,"Rouge, Malvern",0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
20,"Rouge, Malvern",0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
27,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
33,"Rouge, Malvern",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [23]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Asian Restaurant,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,Chinese Restaurant,Doner Restaurant,Dumpling Restaurant,Ethiopian Restaurant,Falafel Restaurant,Fast Food Restaurant,Filipino Restaurant,French Restaurant,Greek Restaurant,Hakka Restaurant,Hong Kong Restaurant,Hotpot Restaurant,Hungarian Restaurant,Indian Restaurant,Indonesian Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,Latin American Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Persian Restaurant,Peruvian Restaurant,Ramen Restaurant,Restaurant,Scandinavian Restaurant,Seafood Restaurant,Sri Lankan Restaurant,Sushi Restaurant,Szechuan Restaurant,Thai Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,"Agincourt North, Milliken",0.0,0.0,0.041667,0.020833,0.0,0.020833,0.0625,0.104167,0.0,0.0,0.0,0.041667,0.020833,0.020833,0.0,0.020833,0.020833,0.0,0.0,0.0,0.083333,0.0,0.041667,0.041667,0.020833,0.0,0.0,0.041667,0.020833,0.208333,0.0,0.0,0.0,0.041667,0.0,0.020833,0.0,0.041667,0.0,0.020833,0.020833,0.0,0.020833
1,Bayview Village,0.0,0.0,0.02,0.04,0.0,0.04,0.12,0.26,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.02,0.02,0.0,0.08,0.0,0.02,0.06,0.04,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.04,0.04
2,Clairlea,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.1875,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.125,0.0,0.03125,0.03125,0.03125,0.0,0.0,0.03125,0.0,0.1875,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0625,0.0,0.0625,0.03125,0.03125,0.03125
3,Cliffcrest,0.038462,0.0,0.038462,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.269231,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.115385,0.038462,0.038462,0.038462
4,Dorset Park,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.176471,0.0,0.0,0.0,0.0,0.058824,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.117647,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.058824,0.058824,0.0
5,Flemingdon Park,0.0,0.0,0.018182,0.054545,0.0,0.054545,0.127273,0.272727,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.036364,0.0,0.018182,0.018182,0.0,0.036364,0.0,0.018182,0.072727,0.054545,0.0,0.0,0.0,0.0,0.036364,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.054545,0.0,0.018182,0.0,0.036364,0.036364
6,Henry Farm,0.0,0.0,0.0,0.037736,0.0,0.037736,0.113208,0.188679,0.0,0.0,0.0,0.018868,0.0,0.0,0.0,0.037736,0.0,0.018868,0.018868,0.0,0.075472,0.0,0.018868,0.056604,0.056604,0.0,0.018868,0.018868,0.0,0.113208,0.0,0.0,0.0,0.018868,0.0,0.0,0.018868,0.075472,0.0,0.0,0.0,0.018868,0.037736
7,Highland Creek,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0
8,Hillcrest Village,0.0,0.0,0.017544,0.017544,0.0,0.017544,0.087719,0.263158,0.0,0.017544,0.0,0.035088,0.035088,0.0,0.0,0.035088,0.052632,0.017544,0.017544,0.0,0.087719,0.0,0.017544,0.017544,0.035088,0.0,0.017544,0.0,0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.017544,0.017544,0.070175,0.0,0.0,0.0,0.017544,0.052632
9,Humber Summit,0.0,0.0,0.0,0.054054,0.0,0.0,0.0,0.081081,0.027027,0.0,0.0,0.0,0.027027,0.0,0.027027,0.027027,0.0,0.0,0.0,0.0,0.0,0.0,0.027027,0.054054,0.27027,0.0,0.0,0.0,0.0,0.135135,0.0,0.0,0.054054,0.0,0.0,0.054054,0.0,0.081081,0.0,0.027027,0.0,0.027027,0.027027


#### Print each neighborhood along with the top 5 most common venues

In [24]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt North, Milliken----
                       venue  freq
0  Middle Eastern Restaurant  0.21
1         Chinese Restaurant  0.10
2          Indian Restaurant  0.08
3       Caribbean Restaurant  0.06
4        American Restaurant  0.04


----Bayview Village----
                   venue  freq
0     Chinese Restaurant  0.26
1   Caribbean Restaurant  0.12
2      Indian Restaurant  0.08
3    Japanese Restaurant  0.06
4  Vietnamese Restaurant  0.04


----Clairlea----
                       venue  freq
0         Chinese Restaurant  0.19
1  Middle Eastern Restaurant  0.19
2          Indian Restaurant  0.12
3       Fast Food Restaurant  0.06
4            Thai Restaurant  0.06


----Cliffcrest----
                           venue  freq
0      Middle Eastern Restaurant  0.27
1                Thai Restaurant  0.12
2              Afghan Restaurant  0.04
3            Filipino Restaurant  0.04
4  Vegetarian / Vegan Restaurant  0.04


----Dorset Park----
                           venue  freq

In [25]:
# Sort the venues in desc order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Create the new dataframe and display the top 10 venues for each neighborhood.

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Agincourt North, Milliken",Middle Eastern Restaurant,Chinese Restaurant,Indian Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Falafel Restaurant,Sushi Restaurant,American Restaurant,Italian Restaurant,Restaurant
1,Bayview Village,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Japanese Restaurant,Vietnamese Restaurant,Hakka Restaurant,Asian Restaurant,Sushi Restaurant,Cantonese Restaurant,Vegetarian / Vegan Restaurant
2,Clairlea,Middle Eastern Restaurant,Chinese Restaurant,Indian Restaurant,Fast Food Restaurant,Thai Restaurant,Sushi Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant
3,Cliffcrest,Middle Eastern Restaurant,Thai Restaurant,Vietnamese Restaurant,Mediterranean Restaurant,American Restaurant,Asian Restaurant,Chinese Restaurant,Ethiopian Restaurant,Filipino Restaurant,French Restaurant
4,Dorset Park,Chinese Restaurant,Japanese Restaurant,Greek Restaurant,Turkish Restaurant,Thai Restaurant,Sushi Restaurant,Indian Restaurant,Asian Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant
5,Flemingdon Park,Chinese Restaurant,Caribbean Restaurant,Japanese Restaurant,Sushi Restaurant,Asian Restaurant,Korean Restaurant,Cantonese Restaurant,Vietnamese Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant
6,Henry Farm,Chinese Restaurant,Caribbean Restaurant,Middle Eastern Restaurant,Indian Restaurant,Sushi Restaurant,Japanese Restaurant,Korean Restaurant,Vietnamese Restaurant,Greek Restaurant,Asian Restaurant
7,Highland Creek,Fast Food Restaurant,Thai Restaurant,Italian Restaurant,Japanese Restaurant,Caribbean Restaurant,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Falafel Restaurant,Hakka Restaurant
8,Hillcrest Village,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Sushi Restaurant,Vietnamese Restaurant,Hakka Restaurant,Greek Restaurant,Falafel Restaurant,Korean Restaurant,Fast Food Restaurant
9,Humber Summit,Korean Restaurant,Middle Eastern Restaurant,Sushi Restaurant,Chinese Restaurant,Asian Restaurant,Seafood Restaurant,Japanese Restaurant,Ramen Restaurant,Doner Restaurant,French Restaurant


#### Run k-means to cluster the neighborhood into 5 clusters.

In [28]:
# set number of clusters
kclusters = 3

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20] 

array([2, 2, 2, 0, 2, 2, 2, 1, 2, 0, 2, 1, 0, 1, 1, 0, 2, 2, 2, 2], dtype=int32)

#### Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [29]:
toronto_merged = neighborhoods_venues_sorted
# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(torontoArea.set_index('Neighborhood'), on='Neighborhood')

toronto_merged # check the last columns!

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels,PostalCode,Borough,Latitude,Longitude
0,"Agincourt North, Milliken",Middle Eastern Restaurant,Chinese Restaurant,Indian Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Falafel Restaurant,Sushi Restaurant,American Restaurant,Italian Restaurant,Restaurant,2,M1V,Scarborough,43.750072,-79.295849
1,Bayview Village,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Japanese Restaurant,Vietnamese Restaurant,Hakka Restaurant,Asian Restaurant,Sushi Restaurant,Cantonese Restaurant,Vegetarian / Vegan Restaurant,2,M2K,North York,43.815252,-79.284577
2,Clairlea,Middle Eastern Restaurant,Chinese Restaurant,Indian Restaurant,Fast Food Restaurant,Thai Restaurant,Sushi Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,2,M1L,Scarborough,43.727929,-79.262029
3,Cliffcrest,Middle Eastern Restaurant,Thai Restaurant,Vietnamese Restaurant,Mediterranean Restaurant,American Restaurant,Asian Restaurant,Chinese Restaurant,Ethiopian Restaurant,Filipino Restaurant,French Restaurant,0,M1M,Scarborough,43.711112,-79.284577
4,Dorset Park,Chinese Restaurant,Japanese Restaurant,Greek Restaurant,Turkish Restaurant,Thai Restaurant,Sushi Restaurant,Indian Restaurant,Asian Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,2,M1P,Scarborough,43.716316,-79.239476
5,Flemingdon Park,Chinese Restaurant,Caribbean Restaurant,Japanese Restaurant,Sushi Restaurant,Asian Restaurant,Korean Restaurant,Cantonese Restaurant,Vietnamese Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,2,M3C,North York,43.799525,-79.318389
6,Henry Farm,Chinese Restaurant,Caribbean Restaurant,Middle Eastern Restaurant,Indian Restaurant,Sushi Restaurant,Japanese Restaurant,Korean Restaurant,Vietnamese Restaurant,Greek Restaurant,Asian Restaurant,2,M2J,North York,43.781638,-79.304302
7,Highland Creek,Fast Food Restaurant,Thai Restaurant,Italian Restaurant,Japanese Restaurant,Caribbean Restaurant,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Falafel Restaurant,Hakka Restaurant,1,M1C,Scarborough,43.784535,-79.160497
8,Hillcrest Village,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Sushi Restaurant,Vietnamese Restaurant,Hakka Restaurant,Greek Restaurant,Falafel Restaurant,Korean Restaurant,Fast Food Restaurant,2,M2H,North York,43.7942,-79.262029
9,Humber Summit,Korean Restaurant,Middle Eastern Restaurant,Sushi Restaurant,Chinese Restaurant,Asian Restaurant,Seafood Restaurant,Japanese Restaurant,Ramen Restaurant,Doner Restaurant,French Restaurant,0,M9L,North York,43.789053,-79.408493


#### Visualize the result and examine the clusters

In [30]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels,PostalCode,Borough,Latitude,Longitude
3,Cliffcrest,Middle Eastern Restaurant,Thai Restaurant,Vietnamese Restaurant,Mediterranean Restaurant,American Restaurant,Asian Restaurant,Chinese Restaurant,Ethiopian Restaurant,Filipino Restaurant,French Restaurant,0,M1M,Scarborough,43.711112,-79.284577
9,Humber Summit,Korean Restaurant,Middle Eastern Restaurant,Sushi Restaurant,Chinese Restaurant,Asian Restaurant,Seafood Restaurant,Japanese Restaurant,Ramen Restaurant,Doner Restaurant,French Restaurant,0,M9L,North York,43.789053,-79.408493
12,"Mount Olive, Thistletown",Korean Restaurant,Middle Eastern Restaurant,Sushi Restaurant,Japanese Restaurant,Seafood Restaurant,Ramen Restaurant,Vietnamese Restaurant,Asian Restaurant,Chinese Restaurant,Doner Restaurant,0,M9V,Etobicoke,43.77012,-79.408493
15,St. James Town,Korean Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Sushi Restaurant,Seafood Restaurant,Chinese Restaurant,Asian Restaurant,Ramen Restaurant,Caribbean Restaurant,French Restaurant,0,M4X,Downtown Toronto,43.786947,-79.385975
15,St. James Town,Korean Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Sushi Restaurant,Seafood Restaurant,Chinese Restaurant,Asian Restaurant,Ramen Restaurant,Caribbean Restaurant,French Restaurant,0,M5C,Downtown Toronto,43.75749,-79.374714


In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels,PostalCode,Borough,Latitude,Longitude
7,Highland Creek,Fast Food Restaurant,Thai Restaurant,Italian Restaurant,Japanese Restaurant,Caribbean Restaurant,Chinese Restaurant,Mexican Restaurant,Vietnamese Restaurant,Falafel Restaurant,Hakka Restaurant,1,M1C,Scarborough,43.784535,-79.160497
11,"Morningside, West Hill",Indian Restaurant,Fast Food Restaurant,Caribbean Restaurant,Chinese Restaurant,Italian Restaurant,Hakka Restaurant,Mexican Restaurant,Vietnamese Restaurant,American Restaurant,Thai Restaurant,1,M1E,Scarborough,43.763573,-79.188711
13,"Rouge, Malvern",Fast Food Restaurant,Chinese Restaurant,Indian Restaurant,Caribbean Restaurant,Italian Restaurant,Sri Lankan Restaurant,Mexican Restaurant,Falafel Restaurant,Hong Kong Restaurant,Hakka Restaurant,1,M1B,Scarborough,43.806686,-79.194353
14,Scarborough Village,Fast Food Restaurant,Indian Restaurant,Chinese Restaurant,Caribbean Restaurant,Sushi Restaurant,Asian Restaurant,Hakka Restaurant,Malay Restaurant,Thai Restaurant,American Restaurant,1,M1J,Scarborough,43.773136,-79.239476
20,Woburn,Fast Food Restaurant,Caribbean Restaurant,Indian Restaurant,Chinese Restaurant,Hakka Restaurant,Greek Restaurant,Italian Restaurant,Sushi Restaurant,Sri Lankan Restaurant,Seafood Restaurant,1,M1G,Scarborough,43.770992,-79.216917
21,York University,Fast Food Restaurant,Hakka Restaurant,Indian Restaurant,Restaurant,Middle Eastern Restaurant,Afghan Restaurant,African Restaurant,Asian Restaurant,Greek Restaurant,Caribbean Restaurant,1,M3J,North York,43.836125,-79.205636


In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels,PostalCode,Borough,Latitude,Longitude
0,"Agincourt North, Milliken",Middle Eastern Restaurant,Chinese Restaurant,Indian Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Falafel Restaurant,Sushi Restaurant,American Restaurant,Italian Restaurant,Restaurant,2,M1V,Scarborough,43.750072,-79.295849
1,Bayview Village,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Japanese Restaurant,Vietnamese Restaurant,Hakka Restaurant,Asian Restaurant,Sushi Restaurant,Cantonese Restaurant,Vegetarian / Vegan Restaurant,2,M2K,North York,43.815252,-79.284577
2,Clairlea,Middle Eastern Restaurant,Chinese Restaurant,Indian Restaurant,Fast Food Restaurant,Thai Restaurant,Sushi Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Japanese Restaurant,Korean Restaurant,2,M1L,Scarborough,43.727929,-79.262029
4,Dorset Park,Chinese Restaurant,Japanese Restaurant,Greek Restaurant,Turkish Restaurant,Thai Restaurant,Sushi Restaurant,Indian Restaurant,Asian Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,2,M1P,Scarborough,43.716316,-79.239476
5,Flemingdon Park,Chinese Restaurant,Caribbean Restaurant,Japanese Restaurant,Sushi Restaurant,Asian Restaurant,Korean Restaurant,Cantonese Restaurant,Vietnamese Restaurant,Indian Restaurant,Vegetarian / Vegan Restaurant,2,M3C,North York,43.799525,-79.318389
6,Henry Farm,Chinese Restaurant,Caribbean Restaurant,Middle Eastern Restaurant,Indian Restaurant,Sushi Restaurant,Japanese Restaurant,Korean Restaurant,Vietnamese Restaurant,Greek Restaurant,Asian Restaurant,2,M2J,North York,43.781638,-79.304302
8,Hillcrest Village,Chinese Restaurant,Caribbean Restaurant,Indian Restaurant,Sushi Restaurant,Vietnamese Restaurant,Hakka Restaurant,Greek Restaurant,Falafel Restaurant,Korean Restaurant,Fast Food Restaurant,2,M2H,North York,43.7942,-79.262029
10,"Ionview, Kennedy Park",Chinese Restaurant,Indian Restaurant,Sushi Restaurant,Caribbean Restaurant,Fast Food Restaurant,Vietnamese Restaurant,Japanese Restaurant,Korean Restaurant,Middle Eastern Restaurant,Hakka Restaurant,2,M1K,Scarborough,43.744734,-79.239476
16,Tam O'Shanter,Middle Eastern Restaurant,Indian Restaurant,Chinese Restaurant,Caribbean Restaurant,Sushi Restaurant,Fast Food Restaurant,Italian Restaurant,Korean Restaurant,Asian Restaurant,Mediterranean Restaurant,2,M1T,Scarborough,43.75741,-79.273304
17,Thorncliffe Park,Chinese Restaurant,Caribbean Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Cantonese Restaurant,Greek Restaurant,Italian Restaurant,Korean Restaurant,Restaurant,Hotpot Restaurant,2,M4H,East York,43.778517,-79.346556


### Result
#### The clustering details provides information about each group and one can make use of this cluster information and infer to choose the neighbourhood for opening the restaurant.