In [80]:
# import necessary libraries
import pandas as pd
import numpy as np
#!conda install urllib3
import urllib3
import requests

In [81]:
# import from wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
from bs4 import BeautifulSoup
res = requests.get(url)
soup = BeautifulSoup(res.content,'lxml')
soup.title.text
# find the table class within the soup
tdata=soup.find("table", attrs={"class": "wikitable"})
tdata_rows=tdata.find_all('tr')

# converting soup data into list that can be added to Pandas dataframe
postal_code_list=[]
borough_list=[]
Neighborhood_list=[]
df=pd.DataFrame()
# looping through the table entries and appending to the list
for tr in tdata_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    if not len(row)==0:
        postal_code_list.append(row[0])
        borough_list.append(row[1])
        Neighborhood_list.append(row[2])
# creating Pandas dataframe
df['Postal Code']=postal_code_list
df['Borough']=borough_list
df['Neighborhood']=Neighborhood_list

In [82]:
# Replacing \n from data
df.replace('\n','',regex=True,inplace=True)
# Dropping the 'Not assigned' boroughs from the data
df.drop(df[df['Borough']=='Not assigned'].index,inplace=True)
# checking for 'Not Assigned' Neighborhoods - didn't find any
df.loc[df['Neighborhood'].isin(['Not Assigned'])]
df.reset_index(inplace=True,drop=True)
# description of dataframe
df.describe()

Unnamed: 0,Postal Code,Borough,Neighborhood
count,103,103,103
unique,103,10,99
top,M4R,North York,Downsview
freq,1,24,4


In [83]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [84]:
df.shape

(103, 3)

# Part 1: Shape of Dataframe: 103 Rows, 3 Columns

# Read Latitude, Longitude csv file

In [85]:
latlong=pd.read_csv('http://cocl.us/Geospatial_data')

In [86]:
latlong.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


# Sort Dataframe based on Postal code for comparison

In [87]:
df.sort_values(by=['Postal Code'],ascending=True,inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
6,M1B,Scarborough,"Malvern, Rouge"
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae


In [88]:
# reset index
df.reset_index(inplace=True,drop=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


# Part 2: Merge to create final Dataframe

In [89]:
df_final=pd.merge(df,latlong,on='Postal Code')
df_final.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Exploring the Neighborhoods

In [90]:
#!conda install -c conda-forge geopy
from geopy import geocoders
from geopy.geocoders import Nominatim


In [91]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library
from folium import map

In [92]:
address= 'Toronto,Canada'
geolocator = Nominatim(user_agent="Tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [93]:
# create map of Canada using latitude and longitude values
map_canada=folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_final['Latitude'], df_final['Longitude'], df_final['Borough'], df_final['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada)  
    
map_canada

# Looking at Toronto Boroughs

In [94]:
toronto_data=df_final[df_final['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [95]:
# create map of Toronto using latitude and longitude values
map_toronto=folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [96]:
toronto_data.shape

(39, 5)

# Toronto Neighborhoods: 39