In [1]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

1) Scraping of table from Wikipedia using BeautifulSoup4 and Pandas

In [2]:
r = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(r.content)

table = soup.find_all('table')
df = pd.read_html(str(table))[0]

Removing rows with borough as Not Assigned

In [3]:
toronto_df = df[df['Borough'] != 'Not assigned']

Renaming 'not assigned' neighborhood with Borough name

In [4]:
toronto_df['Neighborhood'] = np.where(toronto_df['Neighborhood'] == 'Not assigned', toronto_df['Borough'], toronto_df['Neighborhood'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Joining all Neighborhood that has the same Postal Code with a comma separator

In [5]:
toronto_df = toronto_df.groupby(['Postal Code', 'Borough'], as_index = False).agg(','.join)
toronto_df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [6]:
toronto_df.shape

(103, 3)

2) Merging the Longitude and Latitude provided with the Toronto Neighborhood Dataframe

In [7]:
url = 'http://cocl.us/Geospatial_data'
latlng_df = pd.read_csv(url)
latlng_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
toronto_latlng_df = pd.merge(toronto_df, latlng_df, on = 'Postal Code')
toronto_latlng_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


3) Presenting the neighborhood clusters on the map of Toronto

In [9]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

In [10]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent = "to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print ('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [12]:
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)

for lat, lng, borough, neighborhood in zip(toronto_latlng_df['Latitude'], toronto_latlng_df['Longitude'], toronto_latlng_df['Borough'], toronto_latlng_df['Neighborhood']):
    label = 'Borough: {}, Neighborhood: {}'.format(borough, neighborhood)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
    [lat,lng],
    radius = 3,
    popup = label,
    color = 'blue',
    fill = True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
    
map_toronto

NameError: name 'toronto' is not defined