In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np 
import json 

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## 1. Download and Explore Dataset

In [2]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
table = pd.DataFrame(columns=["postcode","borough","neighborhood"])
res=requests.get(url).text
soup=BeautifulSoup(res,'lxml')
for items in soup.find('table',class_='wikitable').find_all('tr')[1::1]:
    data=items.find_all(['th','td'])
    try:
        postcode = data[0].text.rstrip()
        borough = data[1].text.rstrip()
        neighborhood = data[2].text.rstrip()
    except IndexError:pass
    table = table.append({"postcode":postcode,"borough":borough,"neighborhood":neighborhood},ignore_index=True)


In [3]:
# Drop rows with 'Not assigned' borough
table = table[table.borough!='Not assigned']
table = table.reset_index(drop=True)

In [4]:
table.head()

Unnamed: 0,postcode,borough,neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [5]:
#Check there's no "Not assigned" value in borough
table.borough.value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
York                 5
East York            5
Mississauga          1
Name: borough, dtype: int64

In [6]:
#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
dftemp=table[(table.borough!="Not assigned") & (table.neighborhood=="Not assigned")]
dftemp

Unnamed: 0,postcode,borough,neighborhood


In [7]:
# Replace 'Not assigned' neighborhood with corresponding borough
table.neighborhood = table.borough.where((table.borough!='Not assigned')&(table.neighborhood=='Not assigned'),
                                         table.neighborhood)

In [8]:
#Combine neigborhoods having same postcodes in one row separated by a comma.
f_merge_comma = lambda x: " , ".join(x)
table = table.groupby(['postcode','borough']).agg({'neighborhood':f_merge_comma}).reset_index()

In [9]:
table.head(10)

Unnamed: 0,postcode,borough,neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West
9,M1N,Scarborough,Birch Cliff / Cliffside West


In [10]:
table.shape

(103, 3)

In [11]:
postcodes_df=table.copy()

In [12]:
#https://pypi.org/project/pgeocode/
import pgeocode
nomi = pgeocode.Nominatim('ca')
def get_geocode(post_code):
    loc=nomi.query_postal_code(post_code)
    return loc.latitude, loc.longitude

In [13]:
get_geocode('M5G')

(43.6564, -79.38600000000002)

In [14]:
postcodes_df['latitude'], postcodes_df['longitude'] = zip(*postcodes_df['postcode'].apply(get_geocode))

In [15]:
postcodes_df[postcodes_df.longitude.isnull()]

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude
86,M7R,Mississauga,Canada Post Gateway Processing Centre,,


In [16]:
postcodes_df.dropna(inplace=True)

In [17]:
postcodes_df[postcodes_df.longitude.isnull()]

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude


In [18]:
postcodes_df.head(12)

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude
0,M1B,Scarborough,Malvern / Rouge,43.8113,-79.193
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7712,-79.2144
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.7298,-79.2639
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.7122,-79.2843
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.7247,-79.2312
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.6952,-79.2646


In [19]:
neighborhoods = postcodes_df.copy()

In [20]:
neighborhoods.borough.unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       'Etobicoke'], dtype=object)

Get the number of the neighborhoods and boroughs in the dataframe.

In [21]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 9 boroughs and 102 neighborhoods.


#### Use geopy library to get the latitude and longitude values of Toronto.

In [22]:
address = 'Toronto'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.6534817, -79.3839347.


#### Create a map of New York with neighborhoods superimposed on top.

In [23]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['latitude'], neighborhoods['longitude'], 
                                           neighborhoods['borough'], neighborhoods['neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

I will now explore the data for Scarborough and create a new dataframe of Scarborough data

In [24]:
scarborough_data = neighborhoods[neighborhoods['borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude
0,M1B,Scarborough,Malvern / Rouge,43.8113,-79.193
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7712,-79.2144
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.7298,-79.2639
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.7122,-79.2843
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.7247,-79.2312
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.6952,-79.2646


Getting the geographical coordinated for Scarborough

In [25]:
address = 'Scarborough, Toronto'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [26]:
# create map of Scarborough using latitude and longitude values
map_scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(scarborough_data['latitude'], scarborough_data['longitude'], scarborough_data['neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough

Now I'm going to utilize the Foursquare API to explore the neighborhoods and segment them.

In [None]:
#### Define Foursquare Credentials and Version
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Explore the first neighborhood in Scarborough

In [28]:
scarborough_data.loc[0, 'neighborhood']

'Malvern / Rouge'

In [29]:
neighborhood_latitude = scarborough_data.loc[0, 'latitude'] # neighborhood latitude value
neighborhood_longitude = scarborough_data.loc[0, 'longitude'] # neighborhood longitude value

neighborhood_name = scarborough_data.loc[0, 'neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern / Rouge are 43.8113, -79.193.


#### Getting the top 100 venues that are in Malvern / Rouge within a radius of 500 meters.

In [30]:
search_query = 'Malvern / Rouge'
radius = 500
limit = 100

url =\
'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'\
.format(CLIENT_ID,CLIENT_SECRET,neighborhood_latitude,neighborhood_longitude,VERSION,search_query,radius,limit)

url

'https://api.foursquare.com/v2/venues/search?client_id=200EPI4MFVFK1R3Q0K0RQ1MFUX1AF33GSU5YCSDG5S3AN1J1&client_secret=C2QF0IPD4XUCFMIBIOBRWU5MUPSBQD41J2EOXSEGFTKC1NGC&ll=43.8113,-79.193&v=20180605&query=Malvern / Rouge&radius=500&limit=100'

In [31]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e9f3f43fb34b5001b1ea360'},
 'response': {'venues': [{'id': '5561eacb498e09ff53b397ad',
    'name': 'Upper Rouge Trail',
    'location': {'lat': 43.809988,
     'lng': -79.186147,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.809988,
       'lng': -79.186147}],
     'distance': 569,
     'cc': 'CA',
     'country': 'Canada',
     'formattedAddress': ['Canada']},
    'categories': [{'id': '4bf58dd8d48988d159941735',
      'name': 'Trail',
      'pluralName': 'Trails',
      'shortName': 'Trail',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/hikingtrail_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1587494853',
    'hasPerk': False}]}}

In [32]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [33]:
results['response']

{'venues': [{'id': '5561eacb498e09ff53b397ad',
   'name': 'Upper Rouge Trail',
   'location': {'lat': 43.809988,
    'lng': -79.186147,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.809988,
      'lng': -79.186147}],
    'distance': 569,
    'cc': 'CA',
    'country': 'Canada',
    'formattedAddress': ['Canada']},
   'categories': [{'id': '4bf58dd8d48988d159941735',
     'name': 'Trail',
     'pluralName': 'Trails',
     'shortName': 'Trail',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/hikingtrail_',
      'suffix': '.png'},
     'primary': True}],
   'referralId': 'v-1587494853',
   'hasPerk': False}]}

In [34]:
#venues = results['response']['groups'][0]['items']
venues = results['response']['venues']

nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
#filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng,categories.1
0,Upper Rouge Trail,"[{'id': '4bf58dd8d48988d159941735', 'name': 'T...",43.809988,-79.186147,Trail


Looks like we only have one venue here

## 2. Explore Neighborhoods in Scarborough

In [35]:
#function to repeat the same process to all the neighborhoods in Scarborough
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [36]:
scarborough_venues = getNearbyVenues(names=scarborough_data['neighborhood'],
                                   latitudes=scarborough_data['latitude'],
                                   longitudes=scarborough_data['longitude']
                                  )

Malvern / Rouge
Rouge Hill / Port Union / Highland Creek
Guildwood / Morningside / West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park / Ionview / East Birchmount Park
Golden Mile / Clairlea / Oakridge
Cliffside / Cliffcrest / Scarborough Village West
Birch Cliff / Cliffside West
Dorset Park / Wexford Heights / Scarborough Town Centre
Wexford / Maryvale
Agincourt
Clarks Corners / Tam O'Shanter / Sullivan
Milliken / Agincourt North / Steeles East / L'Amoreaux East
Steeles West / L'Amoreaux West
Upper Rouge


In [37]:
print(scarborough_venues.shape)
scarborough_venues.head()

(114, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Malvern / Rouge,43.8113,-79.193,Canadian Appliance Source Whitby,43.808353,-79.191331,Home Service
1,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564,Fox and Fiddle,43.789082,-79.154459,Bar
2,Guildwood / Morningside / West Hill,43.7678,-79.1866,Chick-N-Joy,43.768752,-79.187982,Fried Chicken Joint
3,Guildwood / Morningside / West Hill,43.7678,-79.1866,Little Caesars Pizza,43.769046,-79.184386,Pizza Place
4,Guildwood / Morningside / West Hill,43.7678,-79.1866,Bulk Barn,43.771342,-79.184341,Food & Drink Shop


In [38]:
#Number of venues for each neighborhood
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
Birch Cliff / Cliffside West,4,4,4,4,4,4
Cedarbrae,2,2,2,2,2,2
Clarks Corners / Tam O'Shanter / Sullivan,15,15,15,15,15,15
Cliffside / Cliffcrest / Scarborough Village West,8,8,8,8,8,8
Dorset Park / Wexford Heights / Scarborough Town Centre,2,2,2,2,2,2
Golden Mile / Clairlea / Oakridge,9,9,9,9,9,9
Guildwood / Morningside / West Hill,33,33,33,33,33,33
Kennedy Park / Ionview / East Birchmount Park,14,14,14,14,14,14
Malvern / Rouge,1,1,1,1,1,1


In [39]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 60 uniques categories.


## 3. Analyze Each Neighborhood

In [40]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,Asian Restaurant,Auto Garage,Badminton Court,Bakery,Bank,Bar,Beer Store,Bistro,Breakfast Spot,...,Shopping Mall,Skating Rink,Soccer Field,Spa,Sports Bar,Supermarket,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Train Station
0,Malvern / Rouge,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rouge Hill / Port Union / Highland Creek,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Guildwood / Morningside / West Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Guildwood / Morningside / West Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Guildwood / Morningside / West Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
scarborough_onehot.shape

(114, 61)

#### Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [42]:
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped

Unnamed: 0,Neighborhood,Asian Restaurant,Auto Garage,Badminton Court,Bakery,Bank,Bar,Beer Store,Bistro,Breakfast Spot,...,Shopping Mall,Skating Rink,Soccer Field,Spa,Sports Bar,Supermarket,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Train Station
0,Agincourt,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Birch Cliff / Cliffside West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Clarks Corners / Tam O'Shanter / Sullivan,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,...,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0
4,Cliffside / Cliffcrest / Scarborough Village West,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Dorset Park / Wexford Heights / Scarborough To...,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Golden Mile / Clairlea / Oakridge,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Guildwood / Morningside / West Hill,0.0,0.0,0.0,0.0,0.060606,0.0,0.030303,0.0,0.030303,...,0.0,0.0,0.0,0.0,0.030303,0.030303,0.0,0.0,0.030303,0.0
8,Kennedy Park / Ionview / East Birchmount Park,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429
9,Malvern / Rouge,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
scarborough_grouped.shape

(16, 61)

In [44]:
num_top_venues = 5

for hood in scarborough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0             Breakfast Spot  0.25
1            Badminton Court  0.25
2  Latin American Restaurant  0.25
3               Skating Rink  0.25
4               Liquor Store  0.00


----Birch Cliff / Cliffside West----
                   venue  freq
0        College Stadium  0.25
1  General Entertainment  0.25
2           Skating Rink  0.25
3                   Café  0.25
4               Pharmacy  0.00


----Cedarbrae----
                        venue  freq
0                      Lounge   0.5
1  Construction & Landscaping   0.5
2            Asian Restaurant   0.0
3              Ice Cream Shop   0.0
4          Italian Restaurant   0.0


----Clarks Corners / Tam O'Shanter / Sullivan----
               venue  freq
0           Pharmacy  0.13
1        Pizza Place  0.13
2  Convenience Store  0.07
3      Shopping Mall  0.07
4        Gas Station  0.07


----Cliffside / Cliffcrest / Scarborough Village West----
            venue  freq
0  Ice Cream 

#### Putting that into a pandas dataframe

In [45]:
# function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

 #### Create the new dataframe and display the top 10 venues for each neighborhood.

In [46]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], 
                                                                          num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Skating Rink,Badminton Court,Latin American Restaurant,Breakfast Spot,Fast Food Restaurant,Convenience Store,Department Store,Discount Store,Electronics Store,Train Station
1,Birch Cliff / Cliffside West,General Entertainment,Skating Rink,Café,College Stadium,Train Station,Coffee Shop,Greek Restaurant,Gas Station,Fried Chicken Joint,Food & Drink Shop
2,Cedarbrae,Lounge,Construction & Landscaping,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
3,Clarks Corners / Tam O'Shanter / Sullivan,Pharmacy,Pizza Place,Italian Restaurant,Shopping Mall,Convenience Store,Rental Car Location,Coffee Shop,Chinese Restaurant,Fast Food Restaurant,Bank
4,Cliffside / Cliffcrest / Scarborough Village West,Ice Cream Shop,Coffee Shop,Bank,Pharmacy,Bistro,Pizza Place,Sandwich Place,Discount Store,Construction & Landscaping,Convenience Store


In [47]:
neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Skating Rink,Badminton Court,Latin American Restaurant,Breakfast Spot,Fast Food Restaurant,Convenience Store,Department Store,Discount Store,Electronics Store,Train Station
1,Birch Cliff / Cliffside West,General Entertainment,Skating Rink,Café,College Stadium,Train Station,Coffee Shop,Greek Restaurant,Gas Station,Fried Chicken Joint,Food & Drink Shop
2,Cedarbrae,Lounge,Construction & Landscaping,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
3,Clarks Corners / Tam O'Shanter / Sullivan,Pharmacy,Pizza Place,Italian Restaurant,Shopping Mall,Convenience Store,Rental Car Location,Coffee Shop,Chinese Restaurant,Fast Food Restaurant,Bank
4,Cliffside / Cliffcrest / Scarborough Village West,Ice Cream Shop,Coffee Shop,Bank,Pharmacy,Bistro,Pizza Place,Sandwich Place,Discount Store,Construction & Landscaping,Convenience Store
5,Dorset Park / Wexford Heights / Scarborough To...,Asian Restaurant,Bakery,College Stadium,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
6,Golden Mile / Clairlea / Oakridge,Intersection,Bus Line,Park,Bus Station,Soccer Field,Coffee Shop,Bakery,Fried Chicken Joint,Food & Drink Shop,College Stadium
7,Guildwood / Morningside / West Hill,Pizza Place,Fast Food Restaurant,Coffee Shop,Bank,Liquor Store,Thrift / Vintage Store,Laundromat,Juice Bar,Intersection,Grocery Store
8,Kennedy Park / Ionview / East Birchmount Park,Coffee Shop,Train Station,Sandwich Place,Intersection,Discount Store,Department Store,Light Rail Station,Grocery Store,Pharmacy,Bus Station
9,Malvern / Rouge,Home Service,Gym,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant,Electronics Store


In [48]:
scarborough_data

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude
0,M1B,Scarborough,Malvern / Rouge,43.8113,-79.193
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7712,-79.2144
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.7298,-79.2639
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.7122,-79.2843
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.7247,-79.2312
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.6952,-79.2646


In [49]:
#drop Upper Rouge neiborhood becaus it has no venues
scarborough_data.drop(scarborough_data[scarborough_data.neighborhood == "Upper Rouge"].index,inplace=True)

In [50]:
scarborough_data

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude
0,M1B,Scarborough,Malvern / Rouge,43.8113,-79.193
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7712,-79.2144
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.7298,-79.2639
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.7122,-79.2843
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.7247,-79.2312
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.6952,-79.2646


## 4. Cluster Neighborhoods

Running *k*-means to cluster the neighborhood into 3 clusters.

In [51]:
# set number of clusters
kclusters = 4

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [52]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = scarborough_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='neighborhood')

scarborough_merged.head() # check the last columns!

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,Malvern / Rouge,43.8113,-79.193,0,Home Service,Gym,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant,Electronics Store
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564,3,Bar,Train Station,College Stadium,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.7678,-79.1866,0,Pizza Place,Fast Food Restaurant,Coffee Shop,Bank,Liquor Store,Thrift / Vintage Store,Laundromat,Juice Bar,Intersection,Grocery Store
3,M1G,Scarborough,Woburn,43.7712,-79.2144,2,Korean Restaurant,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant,Electronics Store
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,0,Lounge,Construction & Landscaping,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant


In [53]:
scarborough_merged

Unnamed: 0,postcode,borough,neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,Malvern / Rouge,43.8113,-79.193,0,Home Service,Gym,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant,Electronics Store
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.7878,-79.1564,3,Bar,Train Station,College Stadium,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.7678,-79.1866,0,Pizza Place,Fast Food Restaurant,Coffee Shop,Bank,Liquor Store,Thrift / Vintage Store,Laundromat,Juice Bar,Intersection,Grocery Store
3,M1G,Scarborough,Woburn,43.7712,-79.2144,2,Korean Restaurant,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant,Electronics Store
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389,0,Lounge,Construction & Landscaping,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
5,M1J,Scarborough,Scarborough Village,43.7464,-79.2323,0,Park,Spa,Grocery Store,Badminton Court,Bakery,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.7298,-79.2639,0,Coffee Shop,Train Station,Sandwich Place,Intersection,Discount Store,Department Store,Light Rail Station,Grocery Store,Pharmacy,Bus Station
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.7122,-79.2843,0,Intersection,Bus Line,Park,Bus Station,Soccer Field,Coffee Shop,Bakery,Fried Chicken Joint,Food & Drink Shop,College Stadium
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.7247,-79.2312,0,Ice Cream Shop,Coffee Shop,Bank,Pharmacy,Bistro,Pizza Place,Sandwich Place,Discount Store,Construction & Landscaping,Convenience Store
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.6952,-79.2646,0,General Entertainment,Skating Rink,Café,College Stadium,Train Station,Coffee Shop,Greek Restaurant,Gas Station,Fried Chicken Joint,Food & Drink Shop


#### Visualizing Clusters

In [54]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['latitude'], scarborough_merged['longitude'], 
                                  scarborough_merged['neighborhood'], scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters

### Cluster 1

In [56]:
Cluster_index = 1
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == Cluster_index-1, 
                     scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0,Home Service,Gym,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant,Electronics Store
2,Scarborough,0,Pizza Place,Fast Food Restaurant,Coffee Shop,Bank,Liquor Store,Thrift / Vintage Store,Laundromat,Juice Bar,Intersection,Grocery Store
4,Scarborough,0,Lounge,Construction & Landscaping,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
5,Scarborough,0,Park,Spa,Grocery Store,Badminton Court,Bakery,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop
6,Scarborough,0,Coffee Shop,Train Station,Sandwich Place,Intersection,Discount Store,Department Store,Light Rail Station,Grocery Store,Pharmacy,Bus Station
7,Scarborough,0,Intersection,Bus Line,Park,Bus Station,Soccer Field,Coffee Shop,Bakery,Fried Chicken Joint,Food & Drink Shop,College Stadium
8,Scarborough,0,Ice Cream Shop,Coffee Shop,Bank,Pharmacy,Bistro,Pizza Place,Sandwich Place,Discount Store,Construction & Landscaping,Convenience Store
9,Scarborough,0,General Entertainment,Skating Rink,Café,College Stadium,Train Station,Coffee Shop,Greek Restaurant,Gas Station,Fried Chicken Joint,Food & Drink Shop
10,Scarborough,0,Asian Restaurant,Bakery,College Stadium,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant
12,Scarborough,0,Skating Rink,Badminton Court,Latin American Restaurant,Breakfast Spot,Fast Food Restaurant,Convenience Store,Department Store,Discount Store,Electronics Store,Train Station


### Cluster 2

In [57]:
Cluster_index = 2
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == Cluster_index-1, 
                     scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Scarborough,1,Auto Garage,Train Station,College Stadium,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant


### Cluster 3

In [58]:
Cluster_index = 3
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == Cluster_index-1, 
                     scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,2,Korean Restaurant,Train Station,Gym,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant,Electronics Store


### Cluster 4

In [59]:
Cluster_index = 4
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == Cluster_index-1, 
                     scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,3,Bar,Train Station,College Stadium,Grocery Store,Greek Restaurant,General Entertainment,Gas Station,Fried Chicken Joint,Food & Drink Shop,Fast Food Restaurant


### Conclusion

Most of the Neighborhoods fall in the first cluster which is characterized by restuarants, enterntainment and wellness as the most common venues