# Toronto Neighborhoods

In [11]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import folium

will use 'https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto'

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto'
result = requests.get(url)
print(url)
print(result.status_code)
print(result.headers)

https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto
200
{'Date': 'Sat, 22 Dec 2018 03:46:16 GMT', 'Content-Type': 'text/html; charset=UTF-8', 'Content-Length': '30805', 'Connection': 'keep-alive', 'Server': 'mw1333.eqiad.wmnet', 'X-Content-Type-Options': 'nosniff', 'P3P': 'CP="This is not a P3P policy! See https://en.wikipedia.org/wiki/Special:CentralAutoLogin/P3P for more info."', 'X-Powered-By': 'HHVM/3.18.6-dev', 'Content-language': 'en', 'Last-Modified': 'Thu, 20 Dec 2018 23:29:00 GMT', 'Backend-Timing': 'D=122113 t=1545353436381943', 'Vary': 'Accept-Encoding,Cookie,Authorization,X-Seven', 'Content-Encoding': 'gzip', 'X-Varnish': '48425371 448736211, 338387129 716564982', 'Via': '1.1 varnish (Varnish/5.1), 1.1 varnish (Varnish/5.1)', 'Age': '96938', 'X-Cache': 'cp1079 hit/9, cp1075 hit/15', 'X-Cache-Status': 'hit-front', 'Server-Timing': 'cache;desc="hit-front"', 'Strict-Transport-Security': 'max-age=106384710; includeSubDomains; preload', 'Set-Cookie': 'WMF-Last-Access

In [3]:
# define the dataframe
df = pd.DataFrame(columns=['Hood', 'Latitude', 'Longitude'])
df.head()

Unnamed: 0,Hood,Latitude,Longitude


get data + clean it

In [4]:
soup = BeautifulSoup(result.content, 'html.parser')
table = soup.find('table')
lis = table.find_all('li')

list_of_n = []
for li in lis:
    a = li.find('a')
    list_of_n.append(a.get('title').split(", ")[0].split(" (neighbourhood)")[0].split(" (Toronto)")[0] )

will start populating the dataframe with hood names

In [5]:
df['Hood'] = pd.Series(list_of_n)
print(df.shape)
df.head()

(89, 3)


Unnamed: 0,Hood,Latitude,Longitude
0,Alexandra Park,,
1,The Annex,,
2,Baldwin Village,,
3,Cabbagetown,,
4,CityPlace,,


duplicates?

In [6]:
df.drop_duplicates(inplace=True)
print(df.shape)
df.head()

(86, 3)


Unnamed: 0,Hood,Latitude,Longitude
0,Alexandra Park,,
1,The Annex,,
2,Baldwin Village,,
3,Cabbagetown,,
4,CityPlace,,


loop over to get coordinates and populate the df
need to drop those hoods that the geo does not find

In [7]:
to_drop_unknown = []
geolocator = Nominatim(user_agent="coursera")
for index, row in df.iterrows():
    address = row['Hood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df.loc[index, 'Latitude'] = latitude
        df.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)

df.head()

The geograpical coordinate of Alexandra Park, Toronto are 43.65075755, -79.4042978683821.
The geograpical coordinate of The Annex, Toronto are 43.6703377, -79.407117.
The geograpical coordinate of Baldwin Village, Toronto are 43.66981815, -79.4971720343184.
The geograpical coordinate of Cabbagetown, Toronto are 43.6644734, -79.3669861.
The geograpical coordinate of CityPlace, Toronto are 43.6392482, -79.3963865.
The geograpical coordinate of Chinatown, Toronto are 43.6529237, -79.3980316.
The geograpical coordinate of Church and Wellesley, Toronto are 43.6655242, -79.3838011.
The geograpical coordinate of Corktown, Toronto are 43.6573709, -79.3565189.
The geograpical coordinate of Discovery District, Toronto are 43.6575555, -79.3894803.
The geograpical coordinate of Distillery District, Toronto are 43.6502803, -79.3595767.
The geograpical coordinate of Toronto Entertainment District, Toronto are 43.64383755, -79.3866924741406.
Cannot do: East Bayfront, Toronto, will drop index: 11
The 

Unnamed: 0,Hood,Latitude,Longitude
0,Alexandra Park,43.6508,-79.4043
1,The Annex,43.6703,-79.4071
2,Baldwin Village,43.6698,-79.4972
3,Cabbagetown,43.6645,-79.367
4,CityPlace,43.6392,-79.3964


In [8]:
clean_df = df.drop(to_drop_unknown)

In [9]:
clean_df.shape

(82, 3)

mapping time

In [13]:
address = 'Toronto'
try:
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
    df.loc[index, 'Latitude'] = latitude
    df.loc[index, 'Longitude'] = longitude
except AttributeError:
    print('Cannot do: {}, will drop index: {}'.format(address, index))

my_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(clean_df['Latitude'], clean_df['Longitude'], clean_df['Hood']):
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(my_map)  
    
my_map

The geograpical coordinate of Toronto are 43.653963, -79.387207.
