**Scrape Wikipedia**

In [131]:
import pandas as pd

# parsing Wikipedia using pandas
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
dfs = pd.read_html(url)
df = pd.DataFrame(dfs[0])
print(df.head())

  Postcode           Borough     Neighbourhood
0      M1A      Not assigned      Not assigned
1      M2A      Not assigned      Not assigned
2      M3A        North York         Parkwoods
3      M4A        North York  Victoria Village
4      M5A  Downtown Toronto      Harbourfront


In [0]:
# removing rows with Borough = Not assigned
df = df[df['Borough'] != 'Not assigned']
#print(df.head(15))

In [0]:
# Grouping neighbourhood with same Postcode
aggregation_functions = {'Borough': 'first', 'Neighbourhood': ', '.join}
df = df.groupby(df['Postcode']).aggregate(aggregation_functions)
#print(df.head(15))

In [0]:
# Replacing Neighbourhood value of Not assigned with its corresponding Borough value
df.reset_index(inplace = True)
ind = df[df['Neighbourhood'] == 'Not assigned'].index
df.loc[ind, 'Neighbourhood'] = df.loc[ind, 'Borough']

In [135]:
print(df.shape)

(103, 3)


**Creating new dataframe containing latitude and longitude valules in the existing dataframe**

In [136]:
#read latitude and longitude csv file

df_latlng = pd.read_csv('http://cocl.us/Geospatial_data')
df_latlng.columns = ['Postcode', 'Latitude', 'Longitude']

df_join = pd.merge(df, df_latlng, on=['Postcode'], how='inner')
print(df_join.head())

  Postcode      Borough  ...   Latitude  Longitude
0      M1B  Scarborough  ...  43.806686 -79.194353
1      M1C  Scarborough  ...  43.784535 -79.160497
2      M1E  Scarborough  ...  43.763573 -79.188711
3      M1G  Scarborough  ...  43.770992 -79.216917
4      M1H  Scarborough  ...  43.773136 -79.239476

[5 rows x 5 columns]


In [137]:
# Get location of Toronto
from geopy.geocoders import Nominatim
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [138]:
# store Borough, Neighbourhood, Latitude and Longitude in a separate dataframe

neighbourhoods = df_join[['Borough', 'Neighbourhood', 'Latitude', 'Longitude']].copy()
neighbourhoods.head(5)



Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476


**Explore and Cluster neighbourhoods in Toronto**

In [139]:
# create map of Toronto using latitude and longitude values
import folium
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(neighbourhoods['Latitude'], neighbourhoods['Longitude'], neighbourhoods['Borough'], neighbourhoods['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto