In [4]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup # get data from Wikipedia page
!pip install folium
import folium # make map



### Get data from Wikipedia page: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M and put it into a dataframe

### data.shape, as required in the assignment, is at the bottom of the cell.

In [38]:
link=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup=BeautifulSoup(link,'lxml')

table=soup.find('table')
fields=table.find_all('td')

#empty dataframe to get values
data=pd.DataFrame(columns = ['PostalCode', 'Borough', 'Neighborhood']) 

#pull in values
for i in range(0,len(fields),3):
    data=data.append({'PostalCode':fields[i].text.strip(), 'Borough':fields[i+1].text.strip(), 'Neighborhood':fields[i+2].text.strip()},ignore_index = True) 

#Currently data has 180 rows. Need to drop the rows that say Not assigned. Doing this takes it down to 103 rows.
data=data[data['Borough'] != 'Not assigned']

#Per the instructions, there can be multiple neighborhoods for the same Postal Code and Borough combination. We need to group these neighborhoods together onto a single line
data=data.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
#Have to rename the columns
data.columns=['PostalCode', 'Borough', 'Neighborhood']

data.shape

(103, 3)

### Merge the data set from above with the lat long data from http://cocl.us/Geospatial_data

In [56]:
coords.columns = ['PostalCode' if x=='Postal Code' else x for x in coords.columns]
coords.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [57]:
coords=pd.read_csv('http://cocl.us/Geospatial_data')

#the .csv had a space in PostalCode. Also, couldn't update column name using coords.rename for some reason
coords.columns = ['PostalCode' if x=='Postal Code' else x for x in coords.columns]

data2=pd.merge(data,coords, how='inner',on='PostalCode')
data2.head()




Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Make a map of the Toronto area. Colored the markers red and when you click on them

In [68]:
#center map on the Toronto area
toronto = folium.Map(location=[43.651070, -79.347011], zoom_start=12)

#for map markers
for lat, long, borough, neighborhood in zip(data2['Latitude'],data2['Longitude'],data2['Borough'],data2['Neighborhood']):
    label = f'Borough: {borough}\nNeighborhoods: {neighborhood}'
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.3,
        parse_html=False).add_to(toronto)  
    
toronto