## Clustering Neighbourhoods Toronto - with Geospacial - folium
### import dependencies:  numpy and pandas

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported !')

Libraries imported !


## Load the data, selecting the dataframe that contains:
##      Postcode, Borough, Neighbourhood data

In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
dft = pd.DataFrame(df[0])
for column in dft.columns:
    print(column)

Postcode
Borough
Neighbourhood


# Initial dataframe 

In [3]:
dft.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


# Eliminate all rows that have 'Not assigned' value for 'Borough'

In [4]:
dft_filtered = dft[dft['Borough'] != 'Not assigned']

In [5]:
dft_filtered.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


# Use numpy 'where' to replace 'Neighbourhood' values of 'Not assigned' with # the 'Borough' value. 

In [6]:
dft_filtered['Neighbourhood'] = np.where(dft_filtered['Neighbourhood'] == 'Not assigned',
                                         dft_filtered['Borough'],
                                         dft_filtered['Neighbourhood'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [7]:
dft_filtered.head(11)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


# Shape of final Dataframe

In [8]:
print('The shape of the final dataframe is {}'.format(dft_filtered.shape))

The shape of the final dataframe is (211, 3)


## Read geographical coordinates of each postal code

In [9]:
geo_postal_code = pd.read_csv('http://cocl.us/Geospatial_data')
dpost = pd.DataFrame(geo_postal_code)

In [10]:
dpost.head(11)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


#   Merge the two dataframes such that the 'lat' and 'long and 'neighborhood' data are combined into a new datafram

In [11]:
geo = pd.merge(dft_filtered,dpost,left_on= 'Postcode', right_on='Postal Code')
geo.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,M5A,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,M5A,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,M6A,43.718518,-79.464763
5,M6A,North York,Lawrence Manor,M6A,43.718518,-79.464763
6,M7A,Queen's Park,Queen's Park,M7A,43.662301,-79.389494
7,M9A,Etobicoke,Islington Avenue,M9A,43.667856,-79.532242
8,M1B,Scarborough,Rouge,M1B,43.806686,-79.194353
9,M1B,Scarborough,Malvern,M1B,43.806686,-79.194353


In [12]:
geo.rename(columns = {'Postcode':'PostalCode'},inplace=True)

In [13]:
del geo['Postal Code']

In [14]:
geo.head(11)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763
5,M6A,North York,Lawrence Manor,43.718518,-79.464763
6,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
7,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
8,M1B,Scarborough,Rouge,43.806686,-79.194353
9,M1B,Scarborough,Malvern,43.806686,-79.194353


# rename Neighbourhood to Neighborhood

In [15]:
geo.rename(columns={'Neighbourhood':'Neighborhood'},inplace=True)

In [19]:
geo.head(11)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763
5,M6A,North York,Lawrence Manor,43.718518,-79.464763
6,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
7,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
8,M1B,Scarborough,Rouge,43.806686,-79.194353
9,M1B,Scarborough,Malvern,43.806686,-79.194353
