In [15]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [16]:
wikilink = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
postcodes = pd.read_html(wikilink, header =0)
toronto_codes = postcodes[0].iloc[1:]

## Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned
tdf = toronto_codes[toronto_codes.Borough != 'Not assigned']

## group by post code and aggregate neighborhoods separated with a comma
gtdf = tdf.groupby(['Postcode','Borough'], as_index=False).agg({'Neighbourhood': ','.join})

## If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
gtdf.loc[gtdf['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = gtdf['Borough']

## Load geographical coordinates for neighbourhoods
df = pd.read_csv('https://cocl.us/Geospatial_data')

## Join dataframes
geodf = gtdf.merge(df, left_on='Postcode', right_on='Postal Code')
geodf.drop(columns=['Postal Code'])


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [24]:
## cluster central toronto neighborhoods
toronto_data = geodf[geodf['Borough'] == 'Central Toronto'].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,M4P,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,M4R,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,M4S,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park,Summerhill East",M4T,43.689574,-79.38316


In [25]:
## Use geopy library to get the latitude and longitude values of Central Toronto
address = 'Central Toronto, TO'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Central Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Central Toronto are 43.653963, -79.387207.


In [37]:
# create map of Central Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto