Q1 ) Creating the dataframe using the Wikipedia link and replacing rows with empty values to clean the data

In [1]:
# imports

import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(req.content,'lxml')
table = soup.find_all('table')[0]
column_names = ['Postal Code', 'Borough', 'Neighborhood'] 
df = pd.read_html(str(table))
neigh = pd.DataFrame(df[0])

In [3]:
# replace rows that contain empty values

neigh.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)

In [4]:
# display first 10 rows
neigh.head(10)

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge
11,M3B,North York,Don Mills
12,M4B,East York,Parkview Hill / Woodbine Gardens
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [5]:
# get shape

neigh.shape

(103, 3)

Q2 ) Adding Longitude and Latitude coordinates to Dataframe

In [7]:
# get longitude and latitude from link
geosp_df = pd.read_csv('http://cocl.us/Geospatial_data')

# use groupby function with Postal Code Column
df = geosp_df.groupby('Postal Code')

# use join function to add longitude and latitude while removing extra Postal Code Column
neigh_df = neigh.join(geosp_df)
neigh_df = neigh_df.drop(columns = "Postal Code")
neigh_df.head(11)

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.763573,-79.188711
3,M4A,North York,Victoria Village,43.770992,-79.216917
4,M5A,Downtown Toronto,Regent Park / Harbourfront,43.773136,-79.239476
5,M6A,North York,Lawrence Manor / Lawrence Heights,43.744734,-79.239476
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.727929,-79.262029
8,M9A,Etobicoke,Islington Avenue,43.716316,-79.239476
9,M1B,Scarborough,Malvern / Rouge,43.692657,-79.264848
11,M3B,North York,Don Mills,43.750072,-79.295849
12,M4B,East York,Parkview Hill / Woodbine Gardens,43.7942,-79.262029
13,M5B,Downtown Toronto,"Garden District, Ryerson",43.781638,-79.304302


Q3 ) Explore and cluster the neighborhoods in Toronto

In [11]:
# import libraries
from geopy.geocoders import Nominatim 
import folium

address = 'Toronto, Canada'

# Get coordinates for Toronto
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [25]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# remove rows with missing values
neigh_df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=True)

# add markers to map
for lat, lng, borough, neighborhood in zip(neigh_df['Latitude'], neigh_df['Longitude'], neigh_df['Borough'], neigh_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [42]:
### cluster only the neighborhoods in Downtown Toronto and create a new dataframe of the Downtown Toronto data.

toronto_data = neigh_df[neigh_df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
toronto_data

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,Regent Park / Harbourfront,43.773136,-79.239476
1,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.727929,-79.262029
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.781638,-79.304302
3,M5C,Downtown Toronto,St. James Town,43.77012,-79.408493
4,M5E,Downtown Toronto,Berczy Park,43.739015,-79.506944
5,M5G,Downtown Toronto,Central Bay Street,43.685347,-79.338106
6,M6G,Downtown Toronto,Christie,43.679557,-79.352188
7,M5H,Downtown Toronto,Richmond / Adelaide / King,43.686412,-79.400049
8,M5J,Downtown Toronto,Harbourfront East / Union Station / Toronto Is...,43.650571,-79.384568
9,M5K,Downtown Toronto,Toronto Dominion Centre / Design Exchange,43.653206,-79.400049


In [45]:
toronto_data.loc[3, 'Neighborhood']

'St. James Town'

In [46]:
neighborhood_latitude = toronto_data.loc[3, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_data.loc[3, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_data.loc[3, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of St. James Town are 43.7701199, -79.40849279999999.
