In [1]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
!pip install folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 6.0 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [3]:
import requests

URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(URL)

In [4]:
soup = BeautifulSoup(page.text, 'html.parser')

In [5]:
datas = []

datas_elem = soup.find_all('tr')

for item in datas_elem:
    datas.append(item.text)
    
datas

['\nPostal Code\n\nBorough\n\nNeighbourhood\n',
 '\nM1A\n\nNot assigned\n\nNot assigned\n',
 '\nM2A\n\nNot assigned\n\nNot assigned\n',
 '\nM3A\n\nNorth York\n\nParkwoods\n',
 '\nM4A\n\nNorth York\n\nVictoria Village\n',
 '\nM5A\n\nDowntown Toronto\n\nRegent Park, Harbourfront\n',
 '\nM6A\n\nNorth York\n\nLawrence Manor, Lawrence Heights\n',
 "\nM7A\n\nDowntown Toronto\n\nQueen's Park, Ontario Provincial Government\n",
 '\nM8A\n\nNot assigned\n\nNot assigned\n',
 '\nM9A\n\nEtobicoke\n\nIslington Avenue, Humber Valley Village\n',
 '\nM1B\n\nScarborough\n\nMalvern, Rouge\n',
 '\nM2B\n\nNot assigned\n\nNot assigned\n',
 '\nM3B\n\nNorth York\n\nDon Mills\n',
 '\nM4B\n\nEast York\n\nParkview Hill, Woodbine Gardens\n',
 '\nM5B\n\nDowntown Toronto\n\nGarden District, Ryerson\n',
 '\nM6B\n\nNorth York\n\nGlencairn\n',
 '\nM7B\n\nNot assigned\n\nNot assigned\n',
 '\nM8B\n\nNot assigned\n\nNot assigned\n',
 '\nM9B\n\nEtobicoke\n\nWest Deane Park, Princess Gardens, Martin Grove, Islington, Clover

In [6]:
k = []

for row in datas:
    k.append(row.split('\n'))
k

[['', 'Postal Code', '', 'Borough', '', 'Neighbourhood', ''],
 ['', 'M1A', '', 'Not assigned', '', 'Not assigned', ''],
 ['', 'M2A', '', 'Not assigned', '', 'Not assigned', ''],
 ['', 'M3A', '', 'North York', '', 'Parkwoods', ''],
 ['', 'M4A', '', 'North York', '', 'Victoria Village', ''],
 ['', 'M5A', '', 'Downtown Toronto', '', 'Regent Park, Harbourfront', ''],
 ['', 'M6A', '', 'North York', '', 'Lawrence Manor, Lawrence Heights', ''],
 ['',
  'M7A',
  '',
  'Downtown Toronto',
  '',
  "Queen's Park, Ontario Provincial Government",
  ''],
 ['', 'M8A', '', 'Not assigned', '', 'Not assigned', ''],
 ['',
  'M9A',
  '',
  'Etobicoke',
  '',
  'Islington Avenue, Humber Valley Village',
  ''],
 ['', 'M1B', '', 'Scarborough', '', 'Malvern, Rouge', ''],
 ['', 'M2B', '', 'Not assigned', '', 'Not assigned', ''],
 ['', 'M3B', '', 'North York', '', 'Don Mills', ''],
 ['', 'M4B', '', 'East York', '', 'Parkview Hill, Woodbine Gardens', ''],
 ['', 'M5B', '', 'Downtown Toronto', '', 'Garden District

### -The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood

In [7]:
data = {'Postal Code': [],
       'Borough' : [],
       'Neighbourhood' : []}
data

{'Postal Code': [], 'Borough': [], 'Neighbourhood': []}

In [8]:
for i in range(1, len(k)):
    data['Postal Code'].append(k[i][1])
    data['Borough'].append(k[i][3])
    data['Neighbourhood'].append(k[i][5])

In [9]:
df = pd.DataFrame(data)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
179,M9Z,Not assigned,Not assigned
180,,Canadian postal codes,
181,,,
182,NL,NS,PE


In [10]:
df = df.loc[0:179]

### - Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned

In [11]:
df_assigned = df[df['Borough'] != 'Not assigned']
df_not_assigned = df[df['Borough'] == 'Not assigned']

In [12]:
df_assigned.reset_index(drop=True, inplace=True)

### - More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. 
### These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11  in the above table.

In [13]:
df_assigned.groupby('Postal Code').sum().reset_index()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


### - If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough.

In [14]:
df_not_assigned

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
7,M8A,Not assigned,Not assigned
10,M2B,Not assigned,Not assigned
15,M7B,Not assigned,Not assigned
...,...,...,...
174,M4Z,Not assigned,Not assigned
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned


In [15]:
df_assigned.shape, df_not_assigned.shape

((103, 3), (77, 3))

# Get Latitude, Longitude

In [16]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_8a78c2a4e1d14cd2b0c15b65d675df85 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='_SWHWILr6fOvkCLng4ecEjVqqA-Lrb_989IaN4hv5Omc',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_8a78c2a4e1d14cd2b0c15b65d675df85.get_object(Bucket='myjupyternotebookonibmwatsonstudi-donotdelete-pr-geeznxszhwnezm',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_geo = pd.read_csv(body)
df_geo.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
df_geo.shape

(103, 3)

In [18]:
df_assigned.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [19]:
df_geo.loc[df_geo.index[0]]['Latitude']

43.806686299999996

In [20]:
df_geo = df_geo.groupby('Postal Code', as_index=True).sum()

### I will apply dic's key on df_assigned and I will make a new df

In [21]:
geo_dic = {}
for i in range(len(df_geo)):
    geo_dic[df_geo.index[i]] = {}
    geo_dic[df_geo.index[i]]['Latitude'] = df_geo.loc[df_geo.index[i]]['Latitude']
    geo_dic[df_geo.index[i]]['Longitude'] = df_geo.loc[df_geo.index[i]]['Longitude']

In [22]:
df_geo.loc[df_geo.index[0]]

Latitude     43.806686
Longitude   -79.194353
Name: M1B, dtype: float64

In [23]:
geo_dic

{'M1B': {'Latitude': 43.806686299999996, 'Longitude': -79.19435340000001},
 'M1C': {'Latitude': 43.7845351, 'Longitude': -79.16049709999999},
 'M1E': {'Latitude': 43.7635726, 'Longitude': -79.1887115},
 'M1G': {'Latitude': 43.7709921, 'Longitude': -79.21691740000001},
 'M1H': {'Latitude': 43.773136, 'Longitude': -79.23947609999999},
 'M1J': {'Latitude': 43.7447342, 'Longitude': -79.23947609999999},
 'M1K': {'Latitude': 43.7279292, 'Longitude': -79.26202940000002},
 'M1L': {'Latitude': 43.711111700000004, 'Longitude': -79.2845772},
 'M1M': {'Latitude': 43.716316, 'Longitude': -79.23947609999999},
 'M1N': {'Latitude': 43.692657000000004, 'Longitude': -79.2648481},
 'M1P': {'Latitude': 43.7574096, 'Longitude': -79.27330400000001},
 'M1R': {'Latitude': 43.750071500000004, 'Longitude': -79.2958491},
 'M1S': {'Latitude': 43.7942003, 'Longitude': -79.26202940000002},
 'M1T': {'Latitude': 43.7816375, 'Longitude': -79.3043021},
 'M1V': {'Latitude': 43.8152522, 'Longitude': -79.2845772},
 'M1W':

In [24]:
df_assigned1 = df_assigned['Postal Code'].apply(lambda x: geo_dic[x])
df_assigned1

0      {'Latitude': 43.7532586, 'Longitude': -79.3296...
1      {'Latitude': 43.725882299999995, 'Longitude': ...
2      {'Latitude': 43.6542599, 'Longitude': -79.3606...
3      {'Latitude': 43.718517999999996, 'Longitude': ...
4      {'Latitude': 43.6623015, 'Longitude': -79.3894...
                             ...                        
98     {'Latitude': 43.653653600000005, 'Longitude': ...
99     {'Latitude': 43.6658599, 'Longitude': -79.3831...
100    {'Latitude': 43.6627439, 'Longitude': -79.321558}
101    {'Latitude': 43.6362579, 'Longitude': -79.4985...
102    {'Latitude': 43.6288408, 'Longitude': -79.5209...
Name: Postal Code, Length: 103, dtype: object

In [25]:
#df2 = pd.DataFrame(df_assigned1)

df_assigned2 = pd.DataFrame(list(df_assigned1))
df_assigned2

Unnamed: 0,Latitude,Longitude
0,43.753259,-79.329656
1,43.725882,-79.315572
2,43.654260,-79.360636
3,43.718518,-79.464763
4,43.662301,-79.389494
...,...,...
98,43.653654,-79.506944
99,43.665860,-79.383160
100,43.662744,-79.321558
101,43.636258,-79.498509


In [26]:
df_assigned = pd.concat([df_assigned, df_assigned2], axis=1)
df_assigned

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# GOOD!!

# Explore Neighborhoods in Toronto

1. Create a map of Toronto with neighborhoods superimposed on top
    - Use geopy library to get the latitude and longitude values of Toronto

In [28]:

from geopy.geocoders import Nominatim

In [29]:


address = 'Toronto'

geolocator = Nominatim(user_agent = "toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


Then Use the folium library to plot the map. folium enables both the binding of data to a map as well as passing rich vector/raster/HTML visualizations as markers on the map.

In [30]:
import folium

In [34]:
map_Tnt = folium.Map(location = [latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(df_assigned['Latitude'], df_assigned['Longitude'], df_assigned['Neighbourhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_Tnt)  
    
map_Tnt

In [None]:
# We can find how many venues that are in each neighbourhood within a radius of 500 meters