# Notebook for Capstone Project (week 3)

## Installing necessary libraries

In [43]:
!pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 2.9 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.11.0


In [44]:
import numpy as np
import pandas as pd
import requests
from geopy.geocoders import Nominatim
import folium



## Scraping webpage Wikipedia

In [15]:
webPage = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M") 
html = webPage.text                                                                       
tableStart = html.find('<table class="wikitable sortable">')                               
tableEnd = html.find('</table>')                                                        
htmlTable = html[tableStart:tableEnd]                                                    
table = pd.read_html(htmlTable, header = 0)[0]     

table.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


## Preprocessing the table

In [35]:
table["Borough"] = table["Borough"].replace({"Not assigned":np.nan})                  
table.dropna(inplace = True)                                                          
table.where(table != "Not assigned", table["Borough"], axis = 0, inplace = True)      
joinedRows = table.groupby("Postal Code")["Neighbourhood"].apply(lambda x: ", ".join(x))
table.drop_duplicates(["Postal Code"],inplace = True)                                    
df = table.join(joinedRows, on = "Postal Code", lsuffix='_single')                       
df.drop(columns = ["Neighbourhood_single"], inplace = True)                           
df.reset_index(drop = True, inplace = True)                                          

df.shape

(103, 3)

# Joining Latitude and Longitude

In [34]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Mapping Toronto data



In [None]:
Toronto = df_final[df_final['Borough'].str.contains('Toronto')].reset_index(drop=True)
Toronto.head()

In [59]:
## Looking up Longitude and Latitude

In [53]:
address = 'Toronto'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


## Creating map using the values as found above

In [57]:
# create map of toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, label in zip(xToronto_data['Latitude'], xToronto_data['Longitude'], xToronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=1,
        popup=label,
        color='green',
        fill=False,
        parse_html=False).add_to(map_toronto)  
    
map_toronto