## Data Frame of Toronto's postal codes from Wikipedia

In [59]:
# Import libraries
import numpy as np
import pandas as pd
import geocoder
import folium
import requests 
import json 
import xlrd
import matplotlib.cm as cm
import matplotlib.colors as colors
import pandas as pd

from pandas.io.json import json_normalize 
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
from pandas.io.html import read_html

In [5]:
# Reading the table from Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki = read_html(url)
df = pd.DataFrame(wiki[0])
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [6]:
# Cleaning unnecessary data
df = df[df['Borough'] != 'Not assigned']
df.rename(columns={"Postal Code": "PostalCode", "Neighbourhood": "Neighborhood"}, inplace = True)
df = df.dropna()
df

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
# Group neighborhoods by 'PostalCode' and 'Borough'
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [10]:
# Function that return latitude and longitude
def get_latlong(postal_code):
    lat_long_coords = None
    while(lat_long_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_long_coords = g.latlng
    return lat_long_coords

In [11]:
# Running the 'get_latlong' function on all Postal Codes
coords = np.zeros([0,2])
for pc in df['PostalCode'].tolist():
    coords = np.vstack([coords, get_latlong(pc)])

Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)
Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)
Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)
Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)


In [12]:
# Adding the coordinates to the data frame
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.81153,-79.19552
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78564,-79.15871
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.17520
3,M1G,Scarborough,Woburn,43.76820,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76969,-79.23944
...,...,...,...,...,...
98,M9N,York,Weston,43.70357,-79.51645
99,M9P,Etobicoke,Westmount,43.69623,-79.52926
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.68674,-79.55729
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.74453,-79.58624


In [23]:
df_scarborough = df[df.Borough == 'Scarborough']
df_scarborough

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.81153,-79.19552
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78564,-79.15871
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1752
3,M1G,Scarborough,Woburn,43.7682,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76969,-79.23944
5,M1J,Scarborough,Scarborough Village,43.74309,-79.23526
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72861,-79.26367
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71406,-79.28412
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.69539,-79.26194


In [24]:
address = 'Scarborough,Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough,Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough,Toronto are 43.773077, -79.257774.


In [26]:
# create map of Scarborough using latitude and longitude values
map_Scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_scarborough['Latitude'], df_scarborough['Longitude'], df_scarborough['Borough'], df_scarborough['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Scarborough)  
    
map_Scarborough

In [19]:
# Define Foursquare Credentials and Version
CLIENT_ID = '4J45E1AFI020YX4DW1WD1DGB55OPREU0ZMAZ30YNJL02YVRO' # my Foursquare ID
CLIENT_SECRET = 'H1UG4SHLDKZKY3XRSYSPOPILCQYB5LGQUZQI4TJOBE5JLTRY' # my Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 30 # limit of number of venues returned by Foursquare API

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4J45E1AFI020YX4DW1WD1DGB55OPREU0ZMAZ30YNJL02YVRO
CLIENT_SECRET:H1UG4SHLDKZKY3XRSYSPOPILCQYB5LGQUZQI4TJOBE5JLTRY


In [20]:
# Function that explore the venues in the neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
Scarborough_venues = getNearbyVenues(names = df_scarborough['Neighborhood'], latitudes = df_scarborough['Latitude'], longitudes = df_scarborough['Longitude'])

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge


In [28]:
# Let's check the size of the resulting dataframe
print(Scarborough_venues.shape)
Scarborough_venues.head()

(88, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.81153,-79.19552,T Hamilton & Son Roofing Inc,43.807985,-79.198194,Construction & Landscaping
1,"Malvern, Rouge",43.81153,-79.19552,Canadian Appliance Source Whitby,43.808353,-79.191331,Home Service
2,"Rouge Hill, Port Union, Highland Creek",43.78564,-79.15871,Great Shine Window Cleaning,43.783145,-79.157431,Home Service
3,"Rouge Hill, Port Union, Highland Creek",43.78564,-79.15871,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood, Morningside, West Hill",43.76575,-79.1752,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping


In [29]:
# Let's check how many venues were returned for each neighborhood
Scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,7,7,7,7,7,7
"Birch Cliff, Cliffside West",5,5,5,5,5,5
Cedarbrae,2,2,2,2,2,2
"Clarks Corners, Tam O'Shanter, Sullivan",11,11,11,11,11,11
"Cliffside, Cliffcrest, Scarborough Village West",10,10,10,10,10,10
"Dorset Park, Wexford Heights, Scarborough Town Centre",2,2,2,2,2,2
"Golden Mile, Clairlea, Oakridge",10,10,10,10,10,10
"Guildwood, Morningside, West Hill",3,3,3,3,3,3
"Kennedy Park, Ionview, East Birchmount Park",8,8,8,8,8,8
"Malvern, Rouge",2,2,2,2,2,2


In [30]:
# Let's find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(Scarborough_venues['Venue Category'].unique())))

There are 53 uniques categories.


In [34]:
# one hot encoding
Scarborough_onehot = pd.get_dummies(Scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# move neighborhood column to the first column
#Scarborough_onehot.drop(columns = ['Neighborhood'], inplace = True)
Scarborough_onehot.insert(0, 'Neighborhood', Scarborough_venues['Neighborhood'])

Scarborough_onehot

Unnamed: 0,Neighborhood,Auto Garage,Badminton Court,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Bus Stop,...,Sandwich Place,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,"Steeles West, L'Amoreaux West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
84,"Steeles West, L'Amoreaux West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
85,"Steeles West, L'Amoreaux West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
86,"Steeles West, L'Amoreaux West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [35]:
# Let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
Scarborough_grouped = Scarborough_onehot.groupby('Neighborhood').mean().reset_index()
Scarborough_grouped

Unnamed: 0,Neighborhood,Auto Garage,Badminton Court,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Bus Stop,...,Sandwich Place,Shopping Mall,Skating Rink,Soccer Field,Supermarket,Sushi Restaurant,Thai Restaurant,Thrift / Vintage Store,Trail,Train Station
0,Agincourt,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.142857,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0
1,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
3,"Clarks Corners, Tam O'Shanter, Sullivan",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,...,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0
4,"Cliffside, Cliffcrest, Scarborough Village West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Dorset Park, Wexford Heights, Scarborough Town...",0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Golden Mile, Clairlea, Oakridge",0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.1,0.0,...,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0
7,"Guildwood, Morningside, West Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Kennedy Park, Ionview, East Birchmount Park",0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Malvern, Rouge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
# Let's define a function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [53]:
# Now let's use the function and create new dataframe and display the top 5 venues for each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Scarborough_grouped['Neighborhood']

for ind in np.arange(Scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Shopping Mall,Discount Store,Badminton Court,Park,Sushi Restaurant,Supermarket,Pool,Construction & Landscaping,Convenience Store,Department Store
1,"Birch Cliff, Cliffside West",College Stadium,General Entertainment,Skating Rink,Café,Gym,Train Station,Golf Course,Gift Shop,Fried Chicken Joint,Fast Food Restaurant
2,Cedarbrae,Trail,Playground,Coffee Shop,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
3,"Clarks Corners, Tam O'Shanter, Sullivan",Fast Food Restaurant,Pharmacy,Coffee Shop,Golf Course,Pizza Place,Bus Stop,Shopping Mall,Fried Chicken Joint,Thai Restaurant,Chinese Restaurant
4,"Cliffside, Cliffcrest, Scarborough Village West",Ice Cream Shop,Coffee Shop,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Liquor Store,Discount Store,Hardware Store,Department Store
5,"Dorset Park, Wexford Heights, Scarborough Town...",Bakery,Gift Shop,Train Station,College Stadium,Golf Course,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
6,"Golden Mile, Clairlea, Oakridge",Bus Line,Bakery,Coffee Shop,Intersection,Metro Station,Bus Station,Gym,Soccer Field,Breakfast Spot,Convenience Store
7,"Guildwood, Morningside, West Hill",Gym / Fitness Center,Construction & Landscaping,Park,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
8,"Kennedy Park, Ionview, East Birchmount Park",Coffee Shop,Hobby Shop,Bus Line,Light Rail Station,Discount Store,Department Store,Convenience Store,College Stadium,Gift Shop,General Entertainment
9,"Malvern, Rouge",Home Service,Construction & Landscaping,Train Station,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store


In [54]:
# Run k-means to cluster the neighborhood into 3 clusters
kclusters = 3
Scarborough_grouped_clustering = Scarborough_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Scarborough_grouped_clustering)
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 2])

In [55]:
# Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Scarborough_merged = df_scarborough

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Scarborough_merged = Scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Scarborough_merged = Scarborough_merged.dropna()
Scarborough_merged['Cluster Labels'] = Scarborough_merged['Cluster Labels'].astype(int)

Scarborough_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.81153,-79.19552,2,Home Service,Construction & Landscaping,Train Station,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78564,-79.15871,2,Home Service,Bar,Train Station,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1752,0,Gym / Fitness Center,Construction & Landscaping,Park,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
3,M1G,Scarborough,Woburn,43.7682,-79.21761,0,Coffee Shop,Korean Restaurant,Business Service,Park,College Stadium,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
4,M1H,Scarborough,Cedarbrae,43.76969,-79.23944,0,Trail,Playground,Coffee Shop,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
5,M1J,Scarborough,Scarborough Village,43.74309,-79.23526,0,Ice Cream Shop,Train Station,Restaurant,Coffee Shop,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72861,-79.26367,0,Coffee Shop,Hobby Shop,Bus Line,Light Rail Station,Discount Store,Department Store,Convenience Store,College Stadium,Gift Shop,General Entertainment
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71406,-79.28412,0,Bus Line,Bakery,Coffee Shop,Intersection,Metro Station,Bus Station,Gym,Soccer Field,Breakfast Spot,Convenience Store
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496,0,Ice Cream Shop,Coffee Shop,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Liquor Store,Discount Store,Hardware Store,Department Store
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.69539,-79.26194,0,College Stadium,General Entertainment,Skating Rink,Café,Gym,Train Station,Golf Course,Gift Shop,Fried Chicken Joint,Fast Food Restaurant


In [58]:
# create map to visualize the clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Scarborough_merged['Latitude'], Scarborough_merged['Longitude'], Scarborough_merged['Neighborhood'], Scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [65]:
# Average house prices from https://open.toronto.ca/dataset/wellbeing-toronto-housing/
df_houseavg = pd.read_excel(r'C:\Users\shayb\OneDrive\Documents\Learning\Coursera\IBM Data Science Professional Certificate\9 - Applied Data Science Capstone\Week 5\Scarborough.xlsx', sheet_name='Scarborough')
df_houseavg

Unnamed: 0,Neighborhood,Avg price
0,"Rouge Hill, Port Union, Highland Creek",426850
1,"Guildwood, Morningside, West Hill",444309
2,Woburn,316584
3,Cedarbrae,289646
4,Scarborough Village,356096
5,"Kennedy Park, Ionview, East Birchmount Park",293600
6,"Golden Mile, Clairlea, Oakridge",347446
7,"Cliffside, Cliffcrest, Scarborough Village West",542218
8,"Birch Cliff, Cliffside West",522905
9,"Dorset Park, Wexford Heights, Scarborough Town...",279189


In [66]:
#Adding the prices to our df
Scarborough_merged['AvgPrice'] = df_houseavg['Avg price']
Scarborough_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,AvgPrice
0,M1B,Scarborough,"Malvern, Rouge",43.81153,-79.19552,2,Home Service,Construction & Landscaping,Train Station,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,426850
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78564,-79.15871,2,Home Service,Bar,Train Station,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,444309
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1752,0,Gym / Fitness Center,Construction & Landscaping,Park,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,316584
3,M1G,Scarborough,Woburn,43.7682,-79.21761,0,Coffee Shop,Korean Restaurant,Business Service,Park,College Stadium,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,289646
4,M1H,Scarborough,Cedarbrae,43.76969,-79.23944,0,Trail,Playground,Coffee Shop,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,356096
5,M1J,Scarborough,Scarborough Village,43.74309,-79.23526,0,Ice Cream Shop,Train Station,Restaurant,Coffee Shop,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,293600
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72861,-79.26367,0,Coffee Shop,Hobby Shop,Bus Line,Light Rail Station,Discount Store,Department Store,Convenience Store,College Stadium,Gift Shop,General Entertainment,347446
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71406,-79.28412,0,Bus Line,Bakery,Coffee Shop,Intersection,Metro Station,Bus Station,Gym,Soccer Field,Breakfast Spot,Convenience Store,542218
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496,0,Ice Cream Shop,Coffee Shop,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Liquor Store,Discount Store,Hardware Store,Department Store,522905
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.69539,-79.26194,0,College Stadium,General Entertainment,Skating Rink,Café,Gym,Train Station,Golf Course,Gift Shop,Fried Chicken Joint,Fast Food Restaurant,279189


In [68]:
# First cluster
Scarborough_merged[Scarborough_merged['Cluster Labels'] == 0]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,AvgPrice
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1752,0,Gym / Fitness Center,Construction & Landscaping,Park,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,316584
3,M1G,Scarborough,Woburn,43.7682,-79.21761,0,Coffee Shop,Korean Restaurant,Business Service,Park,College Stadium,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,289646
4,M1H,Scarborough,Cedarbrae,43.76969,-79.23944,0,Trail,Playground,Coffee Shop,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,356096
5,M1J,Scarborough,Scarborough Village,43.74309,-79.23526,0,Ice Cream Shop,Train Station,Restaurant,Coffee Shop,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,293600
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72861,-79.26367,0,Coffee Shop,Hobby Shop,Bus Line,Light Rail Station,Discount Store,Department Store,Convenience Store,College Stadium,Gift Shop,General Entertainment,347446
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71406,-79.28412,0,Bus Line,Bakery,Coffee Shop,Intersection,Metro Station,Bus Station,Gym,Soccer Field,Breakfast Spot,Convenience Store,542218
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496,0,Ice Cream Shop,Coffee Shop,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Liquor Store,Discount Store,Hardware Store,Department Store,522905
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.69539,-79.26194,0,College Stadium,General Entertainment,Skating Rink,Café,Gym,Train Station,Golf Course,Gift Shop,Fried Chicken Joint,Fast Food Restaurant,279189
10,M1P,Scarborough,"Dorset Park, Wexford Heights, Scarborough Town...",43.75998,-79.26837,0,Bakery,Gift Shop,Train Station,College Stadium,Golf Course,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,422689
11,M1R,Scarborough,"Wexford, Maryvale",43.75071,-79.30056,0,Auto Garage,Convenience Store,Gym,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,332710


In [69]:
# Second cluster
Scarborough_merged[Scarborough_merged['Cluster Labels'] == 1]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,AvgPrice
14,M1V,Scarborough,"Milliken, Agincourt North, Steeles East, L'Amo...",43.81781,-79.28024,1,Pharmacy,Train Station,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,Discount Store,Department Store,440688


In [70]:
# Third cluster
Scarborough_merged[Scarborough_merged['Cluster Labels'] == 2]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,AvgPrice
0,M1B,Scarborough,"Malvern, Rouge",43.81153,-79.19552,2,Home Service,Construction & Landscaping,Train Station,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,426850
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78564,-79.15871,2,Home Service,Bar,Train Station,College Stadium,Golf Course,Gift Shop,General Entertainment,Fried Chicken Joint,Fast Food Restaurant,Electronics Store,444309


In [67]:
avg = Scarborough_merged.groupby('Cluster Labels').mean().reset_index()
avg

Unnamed: 0,Cluster Labels,Latitude,Longitude,AvgPrice
0,0,43.753713,-79.259104,386062.384615
1,1,43.81781,-79.28024,440688.0
2,2,43.798585,-79.177115,435579.5
