In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

# Getting HTML page from Wiki and creationg Beautiful Soup package object
main_file_fromWiki = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup_object = BeautifulSoup(main_file_fromWiki.text, 'lxml')

# Extracting table from Wiki file and saving in lists
data = []
columns = []
table = soup_object.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    #First row is columns headings
    if (index == 0):
        columns = section
    else:
        data.append(section)

#convert list into Pandas DataFrame
df_canada = pd.DataFrame(data = data, columns = columns)
df_canada.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
#Delete Borough where value is 'Not Assigned'
df_canada = df_canada[df_canada['Borough'] != 'Not assigned']
df_canada.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [7]:
#Groupby Neighborhood according to Postal code and separate it with comma , because one postal code can have multiple neighborhood
df_canada["Neighborhood"] = df_canada.groupby("Postal Code")["Neighborhood"].transform(lambda neigh: ', '.join(neigh))

# Drop duplicate values
df_canada = df_canada.drop_duplicates()

# Update index to be postcode
if(df_canada.index.name != 'Postal Code'):
    df_canada = df_canada.set_index('Postal Code')
    
df_canada.head(15)

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
M1B,Scarborough,"Malvern, Rouge"
M3B,North York,Don Mills
M4B,East York,"Parkview Hill, Woodbine Gardens"
M5B,Downtown Toronto,"Garden District, Ryerson"


In [8]:
# Replace neighorhood value with borough column  value where there is no value in Neighorhood 
df_canada['Neighborhood'].replace("Not assigned", df_canada["Borough"],inplace=True)
df_canada.head(8)

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
M1B,Scarborough,"Malvern, Rouge"
M3B,North York,Don Mills


In [9]:
print('Dimension of Dataframe is ',df_canada.shape)

Dimension of Dataframe is  (103, 2)


<h1> Question Second of Assignment

In [10]:
# Combining spatial file and data frame df_canada

geo_toronto = "https://cocl.us/Geospatial_data"

!wget r'toronto_m.geospatial_data.csv' geo_toronto

geo_toronto_data = pd.read_csv(geo_toronto).set_index('Postal Code')

geo_toronto_data.head()

'wget' is not recognized as an internal or external command,
operable program or batch file.


Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


In [12]:
# Joining Canada Dataframe file with GeoSpatial file that we read into geo_toronto_data

combined_dataframe = df_canada.join(geo_toronto_data)

combined_dataframe.head(10)

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
M3B,North York,Don Mills,43.745906,-79.352188
M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [13]:
!conda install -c conda-forge folium=0.5.0 --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [16]:
 !conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # Converting address into geographical data
import requests # Handle requests


import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\ProgramData\Anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.22.0-pyh9f0ad1d_0



Downloading and Extracting Packages

geographiclib-1.50   | 34 KB     |            |   0% 
geographiclib-1.50   | 34 KB     | ####7      |  47% 
geographiclib-1.50  

In [17]:
combined_dataframe.head()

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.753259,-79.329656
M4A,North York,Victoria Village,43.725882,-79.315572
M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


<h1> Creationg map of Toronto

In [22]:
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(combined_dataframe['Latitude'], combined_dataframe['Longitude'], combined_dataframe['Borough'], combined_dataframe['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto.save("my_map1.html" )

from IPython.display import HTML

HTML('<iframe src=my_map1.html width=700 height=450></iframe>')

