# Week 3 Part 3 - Toronto Neighbourhoods

In [11]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


## Import additional web scraping libraries

In [12]:
import urllib.request
import time
from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


## Load and explore the data

In [13]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)

response

<Response [200]>

### Source the response html code:

In [14]:
soup = BeautifulSoup(response.text, 'html.parser')

# print(soup.prettify())

### Find postcode table in html code:

In [15]:
table_tag = soup.findAll('table')[0]

postal_code_table = soup.find('table',{'class':'wikitable sortable'})

print(postal_code_table)

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<td

### Extract rows from table

In [16]:
rows = postal_code_table.find_all('tr')

postal_codes = []
boroughs = []
neighbourhoods = []

for row in rows:
#     print(row)
#     print(" ")
    cells = row.find_all('td')

    if len(cells) > 1:
            postal_code = cells[0]
            borough = cells[1]
            neighbourhood = cells[2]

            # Skip 'Not assigned' Boroughs
            if borough.text.strip()  != 'Not assigned':  
                postal_codes.append(postal_code.text.strip())
                boroughs.append(borough.text.strip())
                
                if neighbourhood.text.strip()  == 'Not assigned':  
                    neighbourhoods.append(borough.text.strip())
                else:
                    neighbourhoods.append(neighbourhood.text.strip())

### Create Pandas Dataframe from lists

In [17]:
postal_code_df = pd.DataFrame(zip(postal_codes, boroughs, neighbourhoods), columns =['Postal Code', 'Borough', 'Neighbourhood'])

# postal_code_df = postal_code_df.sort_values(by = ['Postal Code'], ascending = True)

print(postal_code_df)

    Postal Code           Borough  \
0           M3A        North York   
1           M4A        North York   
2           M5A  Downtown Toronto   
3           M6A        North York   
4           M7A  Downtown Toronto   
5           M9A         Etobicoke   
6           M1B       Scarborough   
7           M3B        North York   
8           M4B         East York   
9           M5B  Downtown Toronto   
10          M6B        North York   
11          M9B         Etobicoke   
12          M1C       Scarborough   
13          M3C        North York   
14          M4C         East York   
15          M5C  Downtown Toronto   
16          M6C              York   
17          M9C         Etobicoke   
18          M1E       Scarborough   
19          M4E      East Toronto   
20          M5E  Downtown Toronto   
21          M6E              York   
22          M1G       Scarborough   
23          M4G         East York   
24          M5G  Downtown Toronto   
25          M6G  Downtown Toronto   
2

### Check dataframe shape

In [18]:
postal_code_df.shape

(103, 3)

## Part 2 - Load geolocation & append dataframe

In [19]:
# import geocoder # import geocoder

# # initialize your variable to None
# lat_lng_coords = None

# latitudes  = []
# longitudes = []
# for pc in postal_codes:
#     # loop until you get the coordinates
#     while(lat_lng_coords is None):
#       g = geocoder.google('{}, Toronto, Ontario'.format(pc))
#       lat_lng_coords = g.latlng

#     latitude = lat_lng_coords[0]
#     longitude = lat_lng_coords[1]
    
#     latitudes.append(latitude)
#     longitudes.append(longitude)


# Read Geospatial_Coordinates.csv
geolocation = pd.read_csv('Geospatial_Coordinates.csv')
print(geolocation)

    Postal Code   Latitude  Longitude
0           M1B  43.806686 -79.194353
1           M1C  43.784535 -79.160497
2           M1E  43.763573 -79.188711
3           M1G  43.770992 -79.216917
4           M1H  43.773136 -79.239476
5           M1J  43.744734 -79.239476
6           M1K  43.727929 -79.262029
7           M1L  43.711112 -79.284577
8           M1M  43.716316 -79.239476
9           M1N  43.692657 -79.264848
10          M1P  43.757410 -79.273304
11          M1R  43.750072 -79.295849
12          M1S  43.794200 -79.262029
13          M1T  43.781638 -79.304302
14          M1V  43.815252 -79.284577
15          M1W  43.799525 -79.318389
16          M1X  43.836125 -79.205636
17          M2H  43.803762 -79.363452
18          M2J  43.778517 -79.346556
19          M2K  43.786947 -79.385975
20          M2L  43.757490 -79.374714
21          M2M  43.789053 -79.408493
22          M2N  43.770120 -79.408493
23          M2P  43.752758 -79.400049
24          M2R  43.782736 -79.442259
25          

### Append postcode dataframe with geolocation

In [21]:
postal_code_wGeocode = pd.concat([postal_code_df, geolocation], axis = 1, join = 'inner')

print(postal_code_wGeocode)

    Postal Code           Borough  \
0           M3A        North York   
1           M4A        North York   
2           M5A  Downtown Toronto   
3           M6A        North York   
4           M7A  Downtown Toronto   
5           M9A         Etobicoke   
6           M1B       Scarborough   
7           M3B        North York   
8           M4B         East York   
9           M5B  Downtown Toronto   
10          M6B        North York   
11          M9B         Etobicoke   
12          M1C       Scarborough   
13          M3C        North York   
14          M4C         East York   
15          M5C  Downtown Toronto   
16          M6C              York   
17          M9C         Etobicoke   
18          M1E       Scarborough   
19          M4E      East Toronto   
20          M5E  Downtown Toronto   
21          M6E              York   
22          M1G       Scarborough   
23          M4G         East York   
24          M5G  Downtown Toronto   
25          M6G  Downtown Toronto   
2

Check if any rows were dropped

In [22]:
print(postal_code_wGeocode.shape)

(103, 6)


In [None]:
## Part 3 - Load geolocation & append dataframe