In [2]:
!pip install BeautifulSoup4
!pip install requests



In [3]:
#imports
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

#get html from wiki page and create soup object
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')

#using soup object, iterate the .wikitable to get the data from the HTML page and store it into a list
data = []
columns = []
table = soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    #First row of data is the header
    if (index == 0):
        columns = section
    else:
        data.append(section)

#convert list into Pandas DataFrame
canada_df = pd.DataFrame(data = data,columns = columns)
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
#Remove Boroughs that are 'Not assigned'
canada_df = canada_df[canada_df['Borough'] != 'Not assigned']
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
# More than one neighborhood can exist in one postal code area, combined these into one row with the neighborhoods separated with a comma
canada_df["Neighbourhood"] = canada_df.groupby("Postal Code")["Neighbourhood"].transform(lambda neigh: ', '.join(neigh))

#remove duplicates
canada_df = canada_df.drop_duplicates()

#update index to be postcode if it isn't already
if(canada_df.index.name != 'Postal Code'):
    canada_df = canada_df.set_index('Postal Code')
    
canada_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
canada_df['Neighbourhood'].replace("Not assigned", canada_df["Borough"],inplace=True)
canada_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [7]:
canada_df.shape

(103, 2)

In [8]:
import pandas as pd
import io
import requests
url="https://cocl.us/Geospatial_data"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))

dfc = canada_df.join(c.set_index('Postal Code'), on='Postal Code')
dfc.head()

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [10]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print("Libraries Imported!")

address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

Libraries Imported!
The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [12]:

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dfc['Latitude'], dfc['Longitude'], dfc['Borough'], dfc['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [13]:

CLIENT_ID = 'W5EMFJDSZXJHR5LY4PDHDSQJTAOLDZWHY4FZQVEU2IUD3A3T' # your Foursquare ID
CLIENT_SECRET = 'NP0XXQ52SXYC2ER2KE4KDGLAZHGJZJTFAV2VHHGHCMQ3N30S' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
radius = 500
#print(search_query + ' .... OK!')

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

Your credentails:
CLIENT_ID: W5EMFJDSZXJHR5LY4PDHDSQJTAOLDZWHY4FZQVEU2IUD3A3T
CLIENT_SECRET:NP0XXQ52SXYC2ER2KE4KDGLAZHGJZJTFAV2VHHGHCMQ3N30S


'https://api.foursquare.com/v2/venues/search?client_id=W5EMFJDSZXJHR5LY4PDHDSQJTAOLDZWHY4FZQVEU2IUD3A3T&client_secret=NP0XXQ52SXYC2ER2KE4KDGLAZHGJZJTFAV2VHHGHCMQ3N30S&ll=43.6534817,-79.3839347&v=20180604&radius=500&limit=100'

In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
#Send the GET request and examine the results
results = requests.get(url).json()
results

# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,id,name,categories,referralId,hasPerk,location.lat,location.lng,location.labeledLatLngs,location.distance,location.cc,location.country,location.formattedAddress,location.address,location.crossStreet,location.postalCode,location.city,location.state,location.neighborhood
0,4c093ee0340720a153728493,City Hall Council Chambers,"[{'id': '4bf58dd8d48988d129941735', 'name': 'C...",v-1595448172,False,43.651827,-79.383949,"[{'label': 'display', 'lat': 43.65182710471462...",184,CA,Canada,[Canada],,,,,,
1,4ad4c05ef964a5208ff620e3,Toronto City Hall,"[{'id': '4bf58dd8d48988d129941735', 'name': 'C...",v-1595448172,False,43.65314,-79.383967,"[{'label': 'display', 'lat': 43.65313989695342...",38,CA,Canada,"[100 Queen St. W. (at Bay St.), Toronto ON M5H...",100 Queen St. W.,at Bay St.,M5H 2N2,Toronto,ON,
2,5b193c42598e64002ca79b96,City of Toronto Civic Innovation Office,"[{'id': '4bf58dd8d48988d129941735', 'name': 'C...",v-1595448172,False,43.653454,-79.383952,"[{'label': 'display', 'lat': 43.653454, 'lng':...",3,CA,Canada,"[100 Queen St W, Toronto ON M5H 2N2, Canada]",100 Queen St W,,M5H 2N2,Toronto,ON,
3,50885719498ea7b5aab3a74c,GoodLife Fitness Toronto Bell Trinity Centre,"[{'id': '4bf58dd8d48988d176941735', 'name': 'G...",v-1595448172,False,43.653436,-79.382314,"[{'label': 'display', 'lat': 43.653436, 'lng':...",130,CA,Canada,"[483 Bay St, Toronto ON M5G 2C9, Canada]",483 Bay St,,M5G 2C9,Toronto,ON,
4,4c8938c8944e224b52e72285,Toronto Public Library,"[{'id': '4bf58dd8d48988d12f941735', 'name': 'L...",v-1595448172,False,43.652631,-79.383295,"[{'label': 'display', 'lat': 43.65263139770985...",107,CA,Canada,"[100 Queen St W (City Hall), Toronto ON M5H 2N...",100 Queen St W,City Hall,M5H 2N3,Toronto,ON,


In [16]:
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,lat,lng,labeledLatLngs,distance,cc,country,formattedAddress,address,crossStreet,postalCode,city,state,neighborhood,id
0,City Hall Council Chambers,City Hall,43.651827,-79.383949,"[{'label': 'display', 'lat': 43.65182710471462...",184,CA,Canada,[Canada],,,,,,,4c093ee0340720a153728493
1,Toronto City Hall,City Hall,43.65314,-79.383967,"[{'label': 'display', 'lat': 43.65313989695342...",38,CA,Canada,"[100 Queen St. W. (at Bay St.), Toronto ON M5H...",100 Queen St. W.,at Bay St.,M5H 2N2,Toronto,ON,,4ad4c05ef964a5208ff620e3
2,City of Toronto Civic Innovation Office,City Hall,43.653454,-79.383952,"[{'label': 'display', 'lat': 43.653454, 'lng':...",3,CA,Canada,"[100 Queen St W, Toronto ON M5H 2N2, Canada]",100 Queen St W,,M5H 2N2,Toronto,ON,,5b193c42598e64002ca79b96
3,GoodLife Fitness Toronto Bell Trinity Centre,Gym,43.653436,-79.382314,"[{'label': 'display', 'lat': 43.653436, 'lng':...",130,CA,Canada,"[483 Bay St, Toronto ON M5G 2C9, Canada]",483 Bay St,,M5G 2C9,Toronto,ON,,50885719498ea7b5aab3a74c
4,Toronto Public Library,Library,43.652631,-79.383295,"[{'label': 'display', 'lat': 43.65263139770985...",107,CA,Canada,"[100 Queen St W (City Hall), Toronto ON M5H 2N...",100 Queen St W,City Hall,M5H 2N3,Toronto,ON,,4c8938c8944e224b52e72285
