In [25]:
import numpy as np # library to handle data in a vectorized manner
from bs4 import BeautifulSoup

# use module dotenv to manage API keys and secrets
import os
%load_ext dotenv
%dotenv

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
from geopy.geocoders import ArcGIS # convert an address into latitude and longitude values


import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## Data gathering
Collect data from wikipedia:

In [36]:
wiki_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
website_url = requests.get(wiki_url).text
soup = BeautifulSoup(website_url,'html.parser')
table = soup.find('table', attrs={'class':'wikitable sortable'})
rows = table.find_all('tr')[1:]
list_codes = []
for row in rows:
    cols = row.find_all('td')
    list_codes.append({'PostalCode':cols[0].text, 'Borough':cols[1].text, 'Neighbourhood':cols[2].text})

df_nbh = pd.DataFrame(list_codes)
df_nbh.Neighbourhood = df_nbh.Neighbourhood.str.replace('\\n','')
df_nbh = df_nbh.replace({'Not assigned': np.nan})
df_nbh.dropna(inplace=True)
# initialize column with np.nan
df_nbh['location'] = np.nan
df_nbh.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,location
2,M3A,North York,Parkwoods,
3,M4A,North York,Victoria Village,
4,M5A,Downtown Toronto,Harbourfront,
5,M6A,North York,Lawrence Heights,
6,M6A,North York,Lawrence Manor,


Add geospatial data via geopy library and save to csv:

In [None]:
geolocator = ArcGIS(user_agent="foursquare_agent")
df_nbh['location'] = df_nbh[df_nbh.location.isna()].apply(lambda x : geolocator.geocode(str(x['Neighbourhood']) + " " +  str(x['PostalCode'])), axis=1)
df_nbh['location'].head()

In [None]:
def get_longitude(location):
    try:
        return location.longitude
    except Exception:
        return np.nan
def get_latitude(location):
    try:
        return location.latitude
    except Exception:
        return np.nan
df_nbh['longitude']= df_nbh.location.apply(get_longitude)
df_nbh['latitude']= df_nbh.location.apply(get_latitude)
df_nbh.head()

In [None]:
df_nbh.drop(columns=['location'], inplace=True)
df_nbh.reset_index(drop=True, inplace=True)
df_nbh.to_csv("./toronto.csv")

In [37]:
df_toronto = pd.read_csv("./toronto.csv")
df_toronto.drop(columns=['Unnamed: 0'], inplace=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,longitude,latitude
0,M3A,North York,Parkwoods,-79.329242,43.75242
1,M4A,North York,Victoria Village,-79.313265,43.7306
2,M5A,Downtown Toronto,Harbourfront,114.191795,22.302671
3,M6A,North York,Lawrence Heights,-79.449943,43.714878
4,M6A,North York,Lawrence Manor,-79.449943,43.714878


In [62]:
# some of the location data are false, obviously:

indexNames = df_toronto.loc[abs(df_toronto['longitude'] + 80) > 5].index
df_toronto.drop(indexNames, inplace=True)

### Create a map of Toronto with neighbourhoods superimposed

In [40]:
# create map of Toronto using latitude and longitude values
loc_toronto = geolocator.geocode('Toronto')

map_toronto = folium.Map(location=[loc_toronto.latitude, loc_toronto.longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['latitude'], df_toronto['longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define Foursquare API keys and version:

In [41]:
client_id = os.environ.get('CLIENT_ID')
client_secret = os.environ.get('CLIENT_SECRET')
version = '20180605'

In [42]:
# define query function
def foursquare_query(address, endpoint, search_query, section="", cat_ids=[],radius=10000, limit=100):
    geolocator = Nominatim(user_agent="foursquare_agent")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    categoryId = ''
    if (len(cat_ids)>0):
        ids = df_cats[df_cats.name.isin(cat_ids)]['id']
        categoryId = '&categoryId=' + ','.join(ids)
        print(categoryId)
       
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&ll={},{}&v={}&query={}&section={}&radius={}&limit={}{}'.format(endpoint, client_id, client_secret, latitude, longitude, version, search_query, section, radius, limit,categoryId)
    print(url)
    results = requests.get(url).json()
    #results = ""
    return results


## Explore neighbourhoods

In [63]:
north_york_data = df_toronto[df_toronto['Borough'] == 'North York'].reset_index(drop=True)
north_york_data

Unnamed: 0,PostalCode,Borough,Neighbourhood,longitude,latitude
0,M3A,North York,Parkwoods,-79.329242,43.75242
1,M4A,North York,Victoria Village,-79.313265,43.7306
2,M6A,North York,Lawrence Heights,-79.449943,43.714878
3,M6A,North York,Lawrence Manor,-79.449943,43.714878
4,M3B,North York,Don Mills North,-79.347292,43.752102
5,M6B,North York,Glencairn,-79.446482,43.708429
6,M3C,North York,Flemingdon Park,-79.33117,43.71309
7,M3C,North York,Don Mills South,-79.333858,43.70568
8,M2H,North York,Hillcrest Village,-79.356207,43.802845
9,M3H,North York,Bathurst Manor,-79.448688,43.757875


In [46]:
address = 'North York, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of North York are 43.7543263, -79.44911696639593.


In [48]:
# create map of Toronto using latitude and longitude values
map_north_york = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(north_york_data['latitude'], north_york_data['longitude'], north_york_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_north_york)  
    

## Function for getting nearby venues in a radius of 500 m

In [49]:
LIMIT = 5
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id, 
            client_secret, 
            version, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [64]:
df_ny_venues = getNearbyVenues(names=north_york_data.Neighbourhood, latitudes=north_york_data.latitude, longitudes=north_york_data.longitude)

Parkwoods
Victoria Village
Lawrence Heights
Lawrence Manor
Don Mills North
Glencairn
Flemingdon Park
Don Mills South
Hillcrest Village
Bathurst Manor
Downsview North
Wilson Heights
Henry Farm
York University
Bayview Village
CFB Toronto
Downsview East
Silver Hills
York Mills
Downsview West
North Park
Upwood Park
Humber Summit
Newtonbrook
Willowdale
Downsview Central
Bedford Park
Lawrence Manor East
Humberlea
Willowdale South
Downsview Northwest
York Mills West
Willowdale West


In [65]:
df_ny_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75242,-79.329242,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75242,-79.329242,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.7306,-79.313265,Wigmore Park,43.731023,-79.310771,Park
3,Victoria Village,43.7306,-79.313265,Memories of Africa,43.726602,-79.312427,Grocery Store
4,Victoria Village,43.7306,-79.313265,Vinnia Meats,43.730465,-79.30752,German Restaurant
5,Lawrence Heights,43.714878,-79.449943,Harvey's,43.715413,-79.455296,Fast Food Restaurant
6,Lawrence Heights,43.714878,-79.449943,Tim Hortons,43.716686,-79.447185,Coffee Shop
7,Lawrence Heights,43.714878,-79.449943,PetSmart,43.716544,-79.445926,Pet Store
8,Lawrence Heights,43.714878,-79.449943,Petro-Canada,43.715264,-79.44683,Gas Station
9,Lawrence Heights,43.714878,-79.449943,KFC,43.716688,-79.447515,Fast Food Restaurant
