In [1]:
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import json

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes 
import folium 

print('Libraries imported.')

Libraries imported.


In [2]:
wikiURL = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
torontodata = BeautifulSoup(wikiURL, 'lxml')

In [3]:
table = torontodata.find('table')
fields = table.find_all('td') 

pd.set_option('display.max_rows', 200)  

postcode = []
borough = []
neighborhood = []

for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighborhood.append(fields[i+2].text.strip())

df_toronto_pbn = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
df_toronto_pbn.columns = ['PostalCode', 'Borough', 'Neighborhood']

df_toronto_pbn['Borough'].replace('Not assigned', np.nan, inplace=True)
df_toronto_pbn.dropna(subset=['Borough'], inplace=True)

df_toronto = df_toronto_pbn.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_toronto.columns = ['PostalCode', 'Borough', 'Neighborhood']

df_toronto['Neighborhood'].replace('Not assigned', "Queen's Park", inplace=True)

df_geocodes = pd.read_csv('http://cocl.us/Geospatial_data')

df_geocodes.columns = ['PostalCode', 'Latitude', 'Longitude']

df_post = pd.merge(df_toronto, df_geocodes, on=['PostalCode'], how='inner')
df_torontogeocodes = df_post[['Borough', 'Neighborhood', 'PostalCode', 'Latitude', 'Longitude']].copy()

In [4]:
address = 'Toronto, Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada is at:  {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Canada is at:  43.653963, -79.387207.


In [5]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lon, borough, neighborhood in zip(df_torontogeocodes['Latitude'], \
                                           df_torontogeocodes['Longitude'], \
                                           df_torontogeocodes['Borough'], \
                                           df_torontogeocodes['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=6,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

In [15]:
CLIENT_ID = 'my id' 
CLIENT_SECRET = 'my secret' 
VERSION = '20180605'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: my id
CLIENT_SECRET:my secret


In [7]:
df_torontoneighborhoods = df_torontogeocodes[df_torontogeocodes['Borough'].str.contains('Toronto')]

neighborhood_data = df_torontoneighborhoods.reset_index(drop=True)
neighborhood_data

Unnamed: 0,Borough,Neighborhood,PostalCode,Latitude,Longitude
0,East Toronto,The Beaches,M4E,43.676357,-79.293031
1,East Toronto,"The Danforth West, Riverdale",M4K,43.679557,-79.352188
2,East Toronto,"The Beaches West, India Bazaar",M4L,43.668999,-79.315572
3,East Toronto,Studio District,M4M,43.659526,-79.340923
4,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879
5,Central Toronto,Davisville North,M4P,43.712751,-79.390197
6,Central Toronto,North Toronto West,M4R,43.715383,-79.405678
7,Central Toronto,Davisville,M4S,43.704324,-79.38879
8,Central Toronto,"Moore Park, Summerhill East",M4T,43.689574,-79.38316
9,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",M4V,43.686412,-79.400049


In [8]:
map_torontoneighborhoods = folium.Map(location=[latitude, longitude], zoom_start=10)


for lat, lon, borough, neighborhood in zip(df_torontoneighborhoods['Latitude'], \
                                           df_torontoneighborhoods['Longitude'], \
                                           df_torontoneighborhoods['Borough'], \
                                           df_torontoneighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=6,
        popup=label,
        color='red',
        fill=True,
        fill_color='#CC0000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_torontoneighborhoods)  

map_torontoneighborhoods

In [9]:
neighborhood_name = neighborhood_data.loc[3, 'Neighborhood']
print("The first neighborhoods that we will select is/are: ", neighborhood_name)

neighborhood_lat = neighborhood_data.loc[3, 'Latitude']
neighborhood_lon = neighborhood_data.loc[3, 'Longitude']

print('Latitude and longitude values of {} are:  {}, {}.'.format(neighborhood_name, neighborhood_lat, neighborhood_lon))

The first neighborhoods that we will select is/are:  Studio District
Latitude and longitude values of Studio District are:  43.6595255, -79.340923.


In [10]:
neighborhood_name = neighborhood_data.loc[6, 'Neighborhood']
print("The second neighborhoods that we will select is/are: ", neighborhood_name)

neighborhood_lat = neighborhood_data.loc[6, 'Latitude'] 
neighborhood_lon = neighborhood_data.loc[6, 'Longitude']

print('Latitude and longitude values of {} are:  {}, {}.'.format(neighborhood_name, neighborhood_lat, neighborhood_lon))

The second neighborhoods that we will select is/are:  North Toronto West
Latitude and longitude values of North Toronto West are:  43.7153834, -79.40567840000001.


In [11]:
LIMIT = 100 
radius = 500 
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_lat, 
    neighborhood_lon, 
    radius, 
    LIMIT)
url
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d24aa9f531593002f953930'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Lawrence Park South',
  'headerFullLocation': 'Lawrence Park South, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 16,
  'suggestedBounds': {'ne': {'lat': 43.7198834045, 'lng': -79.39946407145794},
   'sw': {'lat': 43.7108833955, 'lng': -79.41189272854209}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '51606062e4b0878cf540f4a2',
       'name': 'Barreworks',
       'location': {'address': '2576 Yonge St',
        'lat': 43.71407030751952,
        'lng': -79.40010911522093,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.7

In [12]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [13]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Barreworks,Yoga Studio,43.71407,-79.400109
1,Uncle Betty's Diner,Diner,43.714452,-79.400091
2,Starbucks,Coffee Shop,43.715456,-79.400303
3,Tio's Urban Mexican,Mexican Restaurant,43.71463,-79.4
4,Civello Salon,Salon / Barbershop,43.715111,-79.400304


In [14]:
# Return the number of values according to Foursquare
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

16 venues were returned by Foursquare.
