# Exploring Neighbourhoods of Toronto

## Part 1 - Scraping Wikipedia to categorize postal codes

In [76]:
import pandas as pd

First get the postal codes of Canada and parse with BeautifulSoup

In [77]:
#Fetch source postal codes and feed to BeautifulSoup
from bs4 import BeautifulSoup
import requests
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')

Find the postal codes table and use the <td> tag to build a Pandas dataframe

In [78]:
#Fetch data from BeautifulSoup and store in Pandas DataFrame
table = soup.find('tbody')
table_rows = table.find_all('tr')

postal_codes = []
boroughs = []
neighbourhoods = []

#Find body data
for table_row in table_rows[1:]:
    postal_codes.append(table_row.find_all('td')[0].text)
    boroughs.append(table_row.find_all('td')[1].text)
    neighbourhoods.append(table_row.find_all('td')[2].text[0:-1])

postal_codes_ls = list(zip(postal_codes, boroughs, neighbourhoods))
overall_df = pd.DataFrame(postal_codes_ls, columns = ['PostalCodes', 'Boroughs', 'Neighbourhoods'])



Remove any postal codes not assigned to any borough

In [152]:
#Drop Non-Assigned Postal Codes
overall_df = overall_df.query("Boroughs != 'Not assigned'")
overall_df

Unnamed: 0,PostalCodes,Boroughs,Neighbourhoods
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
...,...,...,...
281,M8Z,Etobicoke,Kingsway Park South West
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West


Merge postal codes assigned to multiple neighbourhoods by filtering on unique postal codes

In [149]:
unique_postal_codes = overall_df['PostalCodes'].unique()
unique_boroughs = overall_df['Boroughs'].unique()
unique_neighbourhoods = overall_df['Neighbourhoods'].unique()

boroughs = []
neighbourhoods = []

#Define boroughs to correspond to unique postal codes
for unique_postal_code in unique_postal_codes:
    target_boroughs = overall_df[overall_df.PostalCodes == unique_postal_code]['Boroughs'].unique()    
    boroughs.append(', '.join(target_boroughs))
    #print(f"Target Boroughs: {', '.join(target_boroughs)}")
    #print(target_boroughs)

#Define neighbourhoods to correspond to unique postal codes
for unique_postal_code in unique_postal_codes:
    target_neighbourhoods = overall_df[overall_df.PostalCodes == unique_postal_code]['Neighbourhoods'].unique()    
    neighbourhoods.append(', '.join(target_neighbourhoods))
    #print(target_neighbourhoods)
    
temp_ls = list(zip(unique_postal_codes, boroughs, neighbourhoods))
final_df = pd.DataFrame(temp_ls, columns = ['PostalCodes', 'Boroughs', 'Neighbourhoods'])


Confirm the structure and data of the Pandas dataframe

In [153]:
final_df

Unnamed: 0,PostalCodes,Boroughs,Neighbourhoods
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern
101,M8Y,Etobicoke,"Humber Bay, King's Mill Park, Kingsway Park So..."


Get the shape of the Pandas data frame

In [127]:
#Get the final shape of the postal codes data frame
final_df.shape


(103, 3)

## Part 2 - Add Latitude and Longitude Data

Import latitude and longitude from separate CSV file

In [154]:
lat_long_df = pd.read_csv('https://cocl.us/Geospatial_data')
lat_long_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Perform an inner join on the two data frames, using postal code as key

In [161]:
merged_df = pd.merge(left = final_df, right = lat_long_df, left_on = 'PostalCodes', right_on = 'Postal Code')
merged_df = merged_df.drop('Postal Code', axis=1)

Confirm shape and look of the final data frame

In [162]:
print(f"Shape: {merged_df.shape}")
merged_df.head()

Shape: (103, 5)


Unnamed: 0,PostalCodes,Boroughs,Neighbourhoods,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494


## Part 3 - Explore neighbourhoods of Toronto with FourSquare API

In [163]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
 
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
 
# libraries for displaying images
from IPython.display import Image
from IPython.core.display import HTML
    
# tranforming json file into a pandas dataframe library
from pandas import json_normalize
 
!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library
 
print('Folium installed')
print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /Users/keithdavies/opt/anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |           py37_0         148 KB  conda-forge
    conda-4.8.3                |   py37hc8dfbb8_0         3.0 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    python_abi-3.7             |          1_cp37m           4 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              

In [164]:
#Establish constants we will use for the FourSquare API
CLIENT_ID = 'GYLX2FKS50CNPV3PDRHAUGMY4P3GOKKEKBB5KW4FFJERCXRG' # Foursquare ID
CLIENT_SECRET = 'IU4AQLTCE3CIBP2Z2IOIXZU4LDZGJCGC0PXQIUOAZYXEMOWU' # Foursquare Secret
VERSION = '20180604'
LIMIT = 30

In [166]:
#Explore an address in Toronto, getting latitiude and logitude
#Recall that the geocoding API sometimes times out.  In these cases, just need to re-run.
address = '33 Herbert Ave, Toronto, ON'
 
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.67045871714286 -79.30512166285715


In [170]:
#Find coffee shops near that address
search_query = 'Coffee'
radius = 500
#print(search_query + ' .... OK!')
 
url = f"https://api.foursquare.com/v2/venues/search?client_id={CLIENT_ID}&client_secret={CLIENT_SECRET}&ll={latitude},{longitude}&v={VERSION}&query={search_query}&radius={radius}&limit={LIMIT}"
 
results = requests.get(url).json()

# assign relevant part of JSON to venues
venues = results['response']['venues']
 
# tranform venues into a dataframe
dataframe = pd.json_normalize(venues)
dataframe.head()

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,location.neighborhood
0,4c599c4db05c1b8dde2ad6b1,Coffee Time,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1584439552,False,205 Woodbine ave,Queen and woodbine,43.668998,-79.305699,"[{'label': 'display', 'lat': 43.668998, 'lng':...",169,M4L,CA,Toronto,ON,Canada,"[205 Woodbine ave (Queen and woodbine), Toront...",
1,53ffb4ff498e0a4a7c6c65c7,Buds Coffee Bar,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1584439552,False,1934 Queen St East,,43.669375,-79.303218,"[{'label': 'display', 'lat': 43.66937472977883...",195,,CA,Toronto,ON,Canada,"[1934 Queen St East, Toronto ON, Canada]",
2,4b2f79bef964a520a1eb24e3,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1584439552,False,1960 Queen St E,Waverley Ave,43.669736,-79.302056,"[{'label': 'display', 'lat': 43.669736, 'lng':...",259,M4L 1H8,CA,Toronto,ON,Canada,"[1960 Queen St E (Waverley Ave), Toronto ON M4...",The Beaches


In [182]:
beaches_map = folium.Map(location=[latitude, longitude], zoom_start=15)

for i in range(0,dataframe['id'].count()):
    coffee_shop_name = dataframe.iloc[i,1]
    latitude = dataframe.iloc[i,7]
    longitude = dataframe.iloc[i,8]
    folium.features.CircleMarker(
        [latitude, longitude],
        radius=10,
        popup=coffee_shop_name,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(beaches_map)
 
beaches_map

## Part 4 - Build maps visualizing postal codes in Toronto

In [184]:
# generate map centred around previous address
venues_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add Postal Code Centres as Red Circles
for i in range(0,merged_df['PostalCodes'].count()):
    postal_code_prefix = merged_df.iloc[i,0]
    latitude = merged_df.iloc[i,3]
    longitude = merged_df.iloc[i,4]
    folium.features.CircleMarker(
        [latitude, longitude],
        radius=10,
        popup=postal_code_prefix,
        fill=True,
        color='red',
        fill_color='red',
        fill_opacity=0.6
        ).add_to(venues_map)
 
venues_map