# Part I: Previously done 

In [5]:
import pandas as pd
import requests
from lxml import html

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)
tree = html.fromstring(page.content)
tr = tree.xpath('//tr')

codeList = []

for i in tr:
    row =i.text_content().strip().split('\n')
    if len(row) !=3:
        break
    codeList.append(row)

columns = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(codeList[1:], columns=columns)

pattern = 'Not assigned'
bool = df[df['Borough']!=pattern].reset_index()
bool['Neighborhood'][bool.Neighborhood == pattern] = bool['Borough']

postalCodesDf = bool.groupby(['PostalCode', 'Borough']).Neighborhood.unique().apply(lambda x: ', '.join(x)).reset_index()
print('The number of rows: {}'.format(postalCodesDf.shape[0]))
postalCodesDf.head()

columns = ['PostalCode', 'Latitude', 'Longitude']
coordinates = pd.read_csv('Geospatial_Coordinates.csv')
coordinates.columns = columns

neighborhoods = pd.merge(postalCodesDf, coordinates, on = 'PostalCode')
neighborhoods.head()

The number of rows: 103


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Part II: Exploring and clustering the data

###  Dependencies

In [8]:
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from geopy.geocoders import Nominatim
#from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [9]:
address = 'Toronto'

geolocator = Nominatim(user_agent='toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Create the map of Toronto Postal Code Areas using latitude and longitude values

In [10]:
mapTorontoAreas = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(mapTorontoAreas)  

In [12]:
mapTorontoAreas

##  Selecting boroughs that contain the word Toronto

In [13]:
pattern = 'toronto'
boolean = neighborhoods.Borough.str.contains(pattern, regex=True, case = False)
torontoBoroughs=neighborhoods[boolean].reset_index()

In [14]:
torontoBoroughs

Unnamed: 0,index,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,37,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,43,M4M,East Toronto,Studio District,43.659526,-79.340923
4,44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,47,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,49,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [15]:
len(torontoBoroughs)

38

###  Map of the Selected Postal Areas of Toronto

In [16]:
mapToronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(torontoBoroughs['Latitude'], torontoBoroughs['Longitude'], torontoBoroughs['Borough'], torontoBoroughs['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(mapToronto)  
    
mapToronto