In [4]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import json
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [5]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = requests.get(url)

In [6]:
soup = BeautifulSoup(data.content, 'html.parser')
table = soup.find('table')
trs = table.find_all('tr')
rows = []
for tr in trs:
    i = tr.find_all('td')
    if i:
        rows.append(i)
        
lst = []
for row in rows:
    postalcode = row[0].text.rstrip()
    borough = row[1].text.rstrip()
    neighborhood = row[2].text.rstrip()
    if borough != 'Not assigned':
        if neighborhood == 'Not assigned':
            neighborhood = borough
        lst.append([postalcode, borough, neighborhood])

In [7]:
cols = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(lst, columns=cols)

In [8]:
df.shape

(211, 3)

In [9]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


In [10]:
dfgeo = pd.read_csv("Geospatial_Coordinates.csv")
dfgeo.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [11]:
df2 = pd.merge(df, dfgeo, on="PostalCode", how='left')

In [12]:
df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


In [14]:
CLIENT_ID = 'EPTXLIEHWC4YYYB55UCI5X3GIBZID0S23Z5POKI22123PPXG'
CLIENT_SECRET = 'RPBPYNSEGQXV1K5C5APYEWEI4GWWFQWN30N2HHQHOTOVNZII'
VERSION = '20190708' 

In [32]:
neighborhood_latitude = df2.loc['M3A']['Latitude']
neighborhood_longitude = df2.loc['M3A']['Longitude']

In [34]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [35]:
results = requests.get(url).json()

In [36]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [37]:
venues = results['response']['groups'][0]['items']

In [38]:
nearby_venues = json_normalize(venues)
nearby_venues

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.distance,venue.location.formattedAddress,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.state,venue.name,venue.photos.count,venue.photos.groups
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4e8d9dcdd5fbbbb6b3003c7b-0,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",4e8d9dcdd5fbbbb6b3003c7b,Toronto,CA,Toronto,Canada,245,"[Toronto, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.75197604605557...",43.751976,-79.33214,ON,Brookbanks Park,0,[]
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4e6696b6d16433b9ffff47c3-1,"[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",4e6696b6d16433b9ffff47c3,,CA,,Canada,298,[Canada],"[{'label': 'display', 'lat': 43.75438666345904...",43.754387,-79.333021,,KFC,0,[]
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4cb11e2075ebb60cd1c4caad-2,"[{'id': '4bf58dd8d48988d1f9941735', 'name': 'F...",4cb11e2075ebb60cd1c4caad,29 Valley Woods Road,CA,Toronto,Canada,312,"[29 Valley Woods Road, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.75197441585782...",43.751974,-79.333114,ON,Variety Store,0,[]


In [39]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,KFC,Fast Food Restaurant,43.754387,-79.333021
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [42]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [44]:
venues = getNearbyVenues(names=df2['Borough'],latitudes=df2['Latitude'],longitudes=df2['Longitude'])

North York
North York
Downtown Toronto
Downtown Toronto
North York
North York
Queen's Park
Etobicoke
Scarborough
Scarborough
North York
East York
East York
Downtown Toronto
Downtown Toronto
North York
Etobicoke
Etobicoke
Etobicoke
Etobicoke
Etobicoke
Scarborough
Scarborough
Scarborough
North York
North York
East York
Downtown Toronto
York
Etobicoke
Etobicoke
Etobicoke
Etobicoke
Scarborough
Scarborough
Scarborough
East Toronto
Downtown Toronto
York
Scarborough
East York
Downtown Toronto
Downtown Toronto
Scarborough
North York
North York
North York
North York
East York
Downtown Toronto
Downtown Toronto
Downtown Toronto
West Toronto
West Toronto
Scarborough
North York
North York
North York
North York
North York
East York
Downtown Toronto
Downtown Toronto
Downtown Toronto
West Toronto
West Toronto
Scarborough
Scarborough
Scarborough
North York
North York
North York
East Toronto
East Toronto
Downtown Toronto
Downtown Toronto
West Toronto
West Toronto
West Toronto
Scarborough
Scarborough
Sca

In [50]:
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")
onehot['Neighborhood'] = venues['Neighborhood'] 
onehot.shape
grouped = onehot.groupby('Neighborhood').mean().reset_index()
grouped.shape

(11, 276)

In [51]:
num_top_venues = 5

for hood in grouped['Neighborhood']:
    temp = grouped[grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})

In [52]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [53]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = grouped['Neighborhood']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Pizza Place,Sandwich Place,Pub,Café,Park,Sushi Restaurant,Liquor Store,American Restaurant,Fried Chicken Joint
1,Downtown Toronto,Coffee Shop,Café,Hotel,Restaurant,Bakery,Italian Restaurant,Bar,Japanese Restaurant,American Restaurant,Pizza Place
2,East Toronto,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Brewery,Pub,Pizza Place,Café,Park,Liquor Store
3,East York,Pizza Place,Coffee Shop,Fast Food Restaurant,Bank,Pharmacy,Sporting Goods Shop,Grocery Store,Gym / Fitness Center,Burger Joint,Park
4,Etobicoke,Pizza Place,Grocery Store,Sandwich Place,Coffee Shop,Pharmacy,Fast Food Restaurant,Liquor Store,Beer Store,Fried Chicken Joint,Gym
5,Mississauga,Coffee Shop,Hotel,Burrito Place,Sandwich Place,Middle Eastern Restaurant,American Restaurant,Fried Chicken Joint,Mediterranean Restaurant,Gym / Fitness Center,Dumpling Restaurant
6,North York,Coffee Shop,Clothing Store,Fast Food Restaurant,Restaurant,Japanese Restaurant,Sandwich Place,Pharmacy,Bakery,Shopping Mall,Bank
7,Queen's Park,Coffee Shop,Gym,Diner,Park,College Auditorium,Seafood Restaurant,Sandwich Place,Burger Joint,Burrito Place,Café
8,Scarborough,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Park,Breakfast Spot,Bakery,Intersection,Bus Line,Indian Restaurant,Discount Store
9,West Toronto,Bar,Café,Coffee Shop,Italian Restaurant,Pizza Place,Restaurant,Breakfast Spot,Bakery,Asian Restaurant,Park


In [55]:
kclusters = 5

grouped_clustering = grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

In [56]:
merged = grouped

merged['Cluster Labels'] = kmeans.labels_

merged = merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

merged.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035874,...,Coffee Shop,Pizza Place,Sandwich Place,Pub,Café,Park,Sushi Restaurant,Liquor Store,American Restaurant,Fried Chicken Joint
1,Downtown Toronto,0.0,0.000398,0.002786,0.002786,0.002786,0.005571,0.008357,0.005571,0.016315,...,Coffee Shop,Café,Hotel,Restaurant,Bakery,Italian Restaurant,Bar,Japanese Restaurant,American Restaurant,Pizza Place
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022222,...,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Brewery,Pub,Pizza Place,Café,Park,Liquor Store
3,East York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Pizza Place,Coffee Shop,Fast Food Restaurant,Bank,Pharmacy,Sporting Goods Shop,Grocery Store,Gym / Fitness Center,Burger Joint,Park
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Pizza Place,Grocery Store,Sandwich Place,Coffee Shop,Pharmacy,Fast Food Restaurant,Liquor Store,Beer Store,Fried Chicken Joint,Gym
