In [1]:
#Import libraries
from urllib.request import urlopen
from bs4 import BeautifulSoup
import datetime
import re
import numpy as np
import pandas as pd
import csv

#conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
#from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

#!conda install -c conda-forge folium=0.5.0 --yes 
# uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

In [2]:
# specify the url
quote_page = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# query the website and return the html to the variable ‘page’
page = urlopen(quote_page)

# parse the html using beautiful soup and store in variable `soup`
soup = BeautifulSoup(page, "html.parser")

In [3]:
def remove_tags(data_arr_list):
    tags = ["<td>", "</td>", "\n", "td>" , "</td", "]]"]
    for i in range(0, len(data_arr_list)):
        for j in range(0, len(tags)):
            if str(tags[j]) in str(data_arr_list[i]):
                data_arr_list[i] = data_arr_list[i].replace(tags[j], "")
                if 'title="' in str(data_arr_list[i]):
                    data_arr_list[i] = str(data_arr_list[i]).split('title="')[1].split('">')[0]
    
    return (data_arr_list)

In [4]:
def compile_postal(data_arr_list):

    for i in range (0, len(data_arr_list)-3, 3):

        if str(data_arr_list[i]) == str(data_arr_list[i+3]):
            #Add to the current postal code
            if str(data_arr_list[i+4]) not in data_arr_list[i+1]:
                data_arr_list[i+1] = str(data_arr_list[i+1]) + ", " + str(data_arr_list[i+4])
            if str(data_arr_list[i+5]) not in data_arr_list[i+2]:
                data_arr_list[i+2] = str(data_arr_list[i+2]) + ", " + str(data_arr_list[i+5])
            
            #Remove old entry(s)
            del(data_arr_list[i+3])
            del(data_arr_list[i+3])
            del(data_arr_list[i+3])
            
            data_arr_list = compile_postal(data_arr_list)
            
            break
            
    return data_arr_list

In [5]:
def drop_na_borough(data_arr_list):
    #print(data_arr_list)
    for i in range (1, len(data_arr_list)-1, 3):
        #print(str(data_arr_list[i]))
        if str(data_arr_list[i]) == 'Not assigned':
            #print("Deleting" + data_arr_list[i-1])
            del(data_arr_list[i-1])
            #print("Deleting" + data_arr_list[i-1])
            del(data_arr_list[i-1])
            #print("Deleting" + data_arr_list[i-1])
            del(data_arr_list[i-1])
            
            data_arr_list = drop_na_borough(data_arr_list)
            break
    return data_arr_list

In [6]:
def neighborhood_borough(data_arr_list):
    for i in range (2, len(data_arr_list), 3):
        if str(data_arr_list[i]) == 'Not assigned':
            data_arr_list[i] = str(data_arr_list[i-1])
            
            data_arr_list = neighborhood_borough(data_arr_list)
            break
    return data_arr_list

In [7]:
#Define array to hold all of the data points
data_arr = []

#Get the first table
data = soup.findAll('table')

#assign the points to the array
for row in data:
    for item in row.findAll('td'):
        if "<td>" in str(item):
            data_arr.append(str(item))
   
#Remove the last element in the list as it is invalid
data_arr.pop()

#Remove HTML tags
data_arr = remove_tags(data_arr)
#Compile postal codes
data_arr = compile_postal(data_arr)
#Drop Not assigned boroughs
data_arr = drop_na_borough(data_arr)
#Assign borough to n/a neighborhoods
data_arr = neighborhood_borough(data_arr)

In [8]:
#Create a dictionary
toronto_dict = {'Postal Code':data_arr[0::3], 'borough': data_arr[1::3], 
                                     'Neighborhood':data_arr[2::3] }

#Pandas Data frame
toronto_df = pd.DataFrame.from_dict(toronto_dict)

toronto_df.head()

Unnamed: 0,Postal Code,borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto)


In [9]:
toronto_df.shape

(103, 3)

In [10]:
toronto_df['Latitude'] = 'Not Set'
toronto_df['Longitude'] = 'Not Set'
toronto_df.head()

Unnamed: 0,Postal Code,borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,Not Set,Not Set
1,M4A,North York,Victoria Village,Not Set,Not Set
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park",Not Set,Not Set
3,M6A,North York,"Lawrence Heights, Lawrence Manor",Not Set,Not Set
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto),Not Set,Not Set


In [12]:
with open('Geospatial_Coordinates.csv', 'r') as csvfile:
    geo_reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in geo_reader:
        toronto_df.loc[toronto_df['Postal Code'] == str(row[0]), "Latitude"] = str(row[1])
        toronto_df.loc[toronto_df['Postal Code'] == str(row[0]), "Longitude"] = str(row[2])
    

In [13]:
toronto_df.head()

Unnamed: 0,Postal Code,borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7532586,-79.3296565
1,M4A,North York,Victoria Village,43.7258823,-79.3155716
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park",43.6542599,-79.3606359
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.4647633
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto),43.6623015,-79.3894938


In [25]:
toronto_df.infer_objects().dtypes
toronto_df.dtypes

toronto_df['Latitude'] = pd.to_numeric(toronto_df['Latitude'])
toronto_df['Longitude'] = pd.to_numeric(toronto_df['Longitude'])
toronto_df.dtypes

Postal Code      object
borough          object
Neighborhood     object
Latitude        float64
Longitude       float64
dtype: object

In [26]:

# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[43.6532, 79.3832], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto