## Battle of the Neighbourhoods

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

import json

import requests
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

Read table from the wikipedia page, and edit as in part 1

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
df = pd.read_html(url, header = 0)[0]
df = df[df.Borough != 'Not assigned']
df = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(list).apply(lambda x:', '.join(x)).to_frame().reset_index()
for index, row in df.iterrows():
    if row['Neighbourhood'] == 'Not assigned':
        row['Neighbourhood'] = row['Borough']

Add in latitude and longitudes as in part 2

In [3]:
longLats = pd.read_csv("Geospatial_Coordinates.csv")
longLats = longLats.rename(index=str, columns={'Postal Code': "Postcode"})
df = pd.merge(df, longLats, on='Postcode', how='inner')

In [4]:
df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [17]:
CLIENT_ID = 'KYXXNTKR5QZGYRJMZUGBNAD4LQSX2RMHI4I00DGKZDCOMDRJ' # your Foursquare ID
CLIENT_SECRET = 'T0HTARF4BB1FZUMBA2511ECI0CWJE11SB2M03E053RUGMFQZ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

LIMIT = 100
radius = 500

Your credentails:
CLIENT_ID: KYXXNTKR5QZGYRJMZUGBNAD4LQSX2RMHI4I00DGKZDCOMDRJ
CLIENT_SECRET:T0HTARF4BB1FZUMBA2511ECI0CWJE11SB2M03E053RUGMFQZ


In [42]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [11]:
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
boroughs = df['Borough'].unique().tolist()

In [13]:
lat = df['Latitude'].mean()
lon = df['Longitude'].mean()
print('The geographical coordinates of Toronto are {}, {}'.format(lat, lon))

The geographical coordinates of Toronto are 43.70460773398059, -79.39715291165048


In [14]:
borough_color = {}
for borough in boroughs:
    borough_color[borough]= '#%02X%02X%02X' % tuple(np.random.choice(range(256), size=3))

In [16]:
map_toronto = folium.Map(location=[lat, lon], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], 
                                           df['Longitude'],
                                           df['Borough'], 
                                           df['Neighborhood']):
    label_text = borough + ' - ' + neighborhood
    label = folium.Popup(label_text)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill_color=borough_color[borough],
        fill_opacity=0.7).add_to(map_toronto)  
    

In [18]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [19]:
venues = getNearbyVenues(names=df['Neighborhood'],
                                latitudes=df['Latitude'],
                                longitudes=df['Longitude'])

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

#### NB: Community centre has been changed to skating rink to match with the venue types Foursquare offers

In [25]:
venues.groupby('Neighborhood').count()
"Skating Rink" in venues['Venue Category'].unique()

True

In [26]:
# one hot encoding
to_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
to_onehot['Neighborhoods'] = venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [to_onehot.columns[-1]] + list(to_onehot.columns[:-1])
to_onehot = to_onehot[fixed_columns]

print(to_onehot.shape)
to_onehot.head()

(2258, 280)


Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
to_grouped = to_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(to_grouped.shape)

(100, 280)


In [29]:
len(to_grouped[to_grouped["Skating Rink"] > 0])

4

In [32]:
to_rinks = to_grouped[["Neighborhoods","Skating Rink"]]

In [33]:
# set number of clusters
toclusters = 5

to_clustering = to_rinks.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=toclusters, random_state=0).fit(to_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 3, 0, 0, 0, 0, 2], dtype=int32)

In [35]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
to_merged = to_rinks.copy()

# add clustering labels
to_merged["Cluster Labels"] = kmeans.labels_

to_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)

Locate venues in the area

In [44]:
map_clusters = folium.Map(location=[lat, lon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(toclusters)
ys = [i+x+(i*x)**2 for i in range(toclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(to_merged['Neighborhood Latitude'], to_merged['Neighborhood Longitude'], to_merged['Neighborhood'], to_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster))
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [48]:
to_merged.loc[to_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Skating Rink,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Adelaide, King, Richmond",0.0,0,43.650571,-79.384568,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,Gift Shop,43.646149,-79.419481,Cocktail Bar
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,The Communist's Daughter,43.649362,-79.420963,Bar
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,Artscape Youngplace,43.646921,-79.417342,Art Gallery
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,YogaSpace,43.647607,-79.420133,Yoga Studio
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,Union Restaurant,43.645860,-79.419532,New American Restaurant
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,Pho Rua Vang (Golden Turtle),43.646893,-79.419778,Vietnamese Restaurant
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,Paris Paris Bar,43.649237,-79.421436,Wine Bar
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,Mamakas Taverna,43.646042,-79.419679,Greek Restaurant
65,"Little Portugal, Trinity",0.0,0,43.647927,-79.419750,Reposado,43.647321,-79.420032,Bar


In [47]:
to_merged.loc[to_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Skating Rink,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
98,Woodbine Heights,0.125,1,43.695344,-79.318389,East York Memorial Arena,43.697224,-79.315397,Skating Rink
98,Woodbine Heights,0.125,1,43.695344,-79.318389,The Beer Store,43.693731,-79.316759,Beer Store
98,Woodbine Heights,0.125,1,43.695344,-79.318389,Shoppers Drug Mart,43.699244,-79.318939,Pharmacy
98,Woodbine Heights,0.125,1,43.695344,-79.318389,Stan Wadlow Park,43.697836,-79.314303,Park
98,Woodbine Heights,0.125,1,43.695344,-79.318389,TTC Stop #8913,43.696486,-79.317577,Bus Stop
98,Woodbine Heights,0.125,1,43.695344,-79.318389,East York Memorial Arena,43.697624,-79.315145,Athletics & Sports
98,Woodbine Heights,0.125,1,43.695344,-79.318389,Beauty Boutique by Shoppers Drug Mart,43.699514,-79.319287,Cosmetics Shop
98,Woodbine Heights,0.125,1,43.695344,-79.318389,East York Curling Club,43.696827,-79.313658,Curling Ice


In [49]:
to_merged.loc[to_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Skating Rink,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
9,"Birch Cliff, Cliffside West",0.25,2,43.692657,-79.264848,Birchmount Community Centre,43.695175,-79.262161,General Entertainment
9,"Birch Cliff, Cliffside West",0.25,2,43.692657,-79.264848,The Birchcliff,43.691666,-79.264532,Café
9,"Birch Cliff, Cliffside West",0.25,2,43.692657,-79.264848,Scarborough Gardens,43.694647,-79.26223,Skating Rink
9,"Birch Cliff, Cliffside West",0.25,2,43.692657,-79.264848,Birchmount Stadium,43.695323,-79.261293,College Stadium


In [50]:
to_merged.loc[to_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,Skating Rink,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Toronto Gymnastics International,43.599832,-79.542924,Gym
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Il Paesano Pizzeria & Restaurant,43.60128,-79.545028,Pizza Place
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Alderwood Pool,43.601802,-79.547247,Pool
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Sir Adam Beck Rink,43.602526,-79.547455,Skating Rink
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,T.Shevchenko Ukrainian Community Centre,43.602207,-79.543662,Dance Studio
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Pizza Pizza,43.60534,-79.547252,Pizza Place
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Subway,43.599262,-79.54434,Sandwich Place
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Tim Hortons,43.602396,-79.545048,Coffee Shop
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Timothy's Pub,43.600165,-79.544699,Pub
4,"Alderwood, Long Branch",0.1,3,43.602414,-79.543484,Rexall,43.602165,-79.545372,Pharmacy


In [51]:
to_merged.loc[to_merged['Cluster Labels'] == 4]

Unnamed: 0,Neighborhood,Skating Rink,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Starbucks,43.640990,-79.376264,Coffee Shop
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Piazza Manna,43.641460,-79.377719,Italian Restaurant
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Toronto Blue Jays Box Office,43.642416,-79.385862,Baseball Stadium
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Sportchek,43.642815,-79.380408,Sporting Goods Shop
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Jays Shop Stadium Edition,43.641721,-79.387127,Sporting Goods Shop
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Queen's Quay Terminal,43.638318,-79.380413,Shopping Mall
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Popeyes Louisiana Kitchen,43.639864,-79.381136,Fried Chicken Joint
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Thai Express,43.642642,-79.383375,Restaurant
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,The Goodman Pub & Kitchen,43.638301,-79.380008,Restaurant
50,"Harbourfront East, Toronto Islands, Union Station",0.01,4,43.640816,-79.381752,Starbucks,43.642484,-79.381373,Coffee Shop


In [52]:
to_merged.loc[to_merged['Cluster Labels'] == 5]

Unnamed: 0,Neighborhood,Skating Rink,Cluster Labels,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
