The cell below is importing libraries that I expect to use throughout this lab. 

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


The next two cells are importing Beautiful Soup 4 and scraping the Postal Codes from the Wikipedia page.

In [2]:
import bs4

In [3]:
from bs4 import BeautifulSoup
import requests
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
#article = soup.find('table',class_='wikitable sortable')
table = soup.find('table',class_='wikitable sortable')
table_rows = table.find_all('tr')
res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)
df = pd.DataFrame(res, columns=["PostalCode", "Borough", "Neighborhood"])
export_csv = df.to_csv (r'export_dataframe.csv', encoding='utf-8', index = None, header=True)
print(df)

    PostalCode           Borough  \
0          M1A      Not assigned   
1          M2A      Not assigned   
2          M3A        North York   
3          M4A        North York   
4          M5A  Downtown Toronto   
5          M5A  Downtown Toronto   
6          M6A        North York   
7          M6A        North York   
8          M7A      Queen's Park   
9          M8A      Not assigned   
10         M9A         Etobicoke   
11         M1B       Scarborough   
12         M1B       Scarborough   
13         M2B      Not assigned   
14         M3B        North York   
15         M4B         East York   
16         M4B         East York   
17         M5B  Downtown Toronto   
18         M5B  Downtown Toronto   
19         M6B        North York   
20         M7B      Not assigned   
21         M8B      Not assigned   
22         M9B         Etobicoke   
23         M9B         Etobicoke   
24         M9B         Etobicoke   
25         M9B         Etobicoke   
26         M9B         Etobi

The cell below is for creating the data frame from the csv that was created during the scrape. 

In [4]:
df2=pd.read_csv('export_dataframe.csv') 
print(df2)

    PostalCode           Borough  \
0          M1A      Not assigned   
1          M2A      Not assigned   
2          M3A        North York   
3          M4A        North York   
4          M5A  Downtown Toronto   
5          M5A  Downtown Toronto   
6          M6A        North York   
7          M6A        North York   
8          M7A      Queen's Park   
9          M8A      Not assigned   
10         M9A         Etobicoke   
11         M1B       Scarborough   
12         M1B       Scarborough   
13         M2B      Not assigned   
14         M3B        North York   
15         M4B         East York   
16         M4B         East York   
17         M5B  Downtown Toronto   
18         M5B  Downtown Toronto   
19         M6B        North York   
20         M7B      Not assigned   
21         M8B      Not assigned   
22         M9B         Etobicoke   
23         M9B         Etobicoke   
24         M9B         Etobicoke   
25         M9B         Etobicoke   
26         M9B         Etobi

The cell below is grouping the Neighborhoods into their appropriate Postal Code and Borough.

In [5]:
df_grouped = df2.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
print(df_grouped)

    PostalCode           Borough  \
0          M1A      Not assigned   
1          M1B       Scarborough   
2          M1C       Scarborough   
3          M1E       Scarborough   
4          M1G       Scarborough   
5          M1H       Scarborough   
6          M1J       Scarborough   
7          M1K       Scarborough   
8          M1L       Scarborough   
9          M1M       Scarborough   
10         M1N       Scarborough   
11         M1P       Scarborough   
12         M1R       Scarborough   
13         M1S       Scarborough   
14         M1T       Scarborough   
15         M1V       Scarborough   
16         M1W       Scarborough   
17         M1X       Scarborough   
18         M1Y      Not assigned   
19         M1Z      Not assigned   
20         M2A      Not assigned   
21         M2B      Not assigned   
22         M2C      Not assigned   
23         M2E      Not assigned   
24         M2G      Not assigned   
25         M2H        North York   
26         M2J        North 

The cell below is eliminating the Boroughs with a value of Not Assigned

In [6]:
df3 = df_grouped[df_grouped.Borough != 'Not assigned']
print(df3)

    PostalCode           Borough  \
1          M1B       Scarborough   
2          M1C       Scarborough   
3          M1E       Scarborough   
4          M1G       Scarborough   
5          M1H       Scarborough   
6          M1J       Scarborough   
7          M1K       Scarborough   
8          M1L       Scarborough   
9          M1M       Scarborough   
10         M1N       Scarborough   
11         M1P       Scarborough   
12         M1R       Scarborough   
13         M1S       Scarborough   
14         M1T       Scarborough   
15         M1V       Scarborough   
16         M1W       Scarborough   
17         M1X       Scarborough   
25         M2H        North York   
26         M2J        North York   
27         M2K        North York   
28         M2L        North York   
29         M2M        North York   
30         M2N        North York   
31         M2P        North York   
32         M2R        North York   
40         M3A        North York   
41         M3B        North 

The cell below is showing the dimensions of the resulting dataframe. 

In [7]:
df3.shape

(103, 3)

The cell below is reading the Geospatial Coordinates csv file.

In [8]:
df_geo=pd.read_csv('Geospatial_Coordinates.csv')
print(df_geo)

    Postal Code   Latitude  Longitude
0           M1B  43.806686 -79.194353
1           M1C  43.784535 -79.160497
2           M1E  43.763573 -79.188711
3           M1G  43.770992 -79.216917
4           M1H  43.773136 -79.239476
5           M1J  43.744734 -79.239476
6           M1K  43.727929 -79.262029
7           M1L  43.711112 -79.284577
8           M1M  43.716316 -79.239476
9           M1N  43.692657 -79.264848
10          M1P  43.757410 -79.273304
11          M1R  43.750072 -79.295849
12          M1S  43.794200 -79.262029
13          M1T  43.781638 -79.304302
14          M1V  43.815252 -79.284577
15          M1W  43.799525 -79.318389
16          M1X  43.836125 -79.205636
17          M2H  43.803762 -79.363452
18          M2J  43.778517 -79.346556
19          M2K  43.786947 -79.385975
20          M2L  43.757490 -79.374714
21          M2M  43.789053 -79.408493
22          M2N  43.770120 -79.408493
23          M2P  43.752758 -79.400049
24          M2R  43.782736 -79.442259
25          

The cell below joins the two dataframes together so that I have the Borough, Neighborhood and Coordinates in one dataframe. 

In [9]:
df_cd = pd.merge(df3, df_geo, how='inner', left_on = 'PostalCode', right_on = 'Postal Code')
print(df_cd)

    PostalCode           Borough  \
0          M1B       Scarborough   
1          M1C       Scarborough   
2          M1E       Scarborough   
3          M1G       Scarborough   
4          M1H       Scarborough   
5          M1J       Scarborough   
6          M1K       Scarborough   
7          M1L       Scarborough   
8          M1M       Scarborough   
9          M1N       Scarborough   
10         M1P       Scarborough   
11         M1R       Scarborough   
12         M1S       Scarborough   
13         M1T       Scarborough   
14         M1V       Scarborough   
15         M1W       Scarborough   
16         M1X       Scarborough   
17         M2H        North York   
18         M2J        North York   
19         M2K        North York   
20         M2L        North York   
21         M2M        North York   
22         M2N        North York   
23         M2P        North York   
24         M2R        North York   
25         M3A        North York   
26         M3B        North 