# Segmenting and Clustering Neighborhoods in Toronto

In [19]:
import pandas as pd

### Download dataset

In [20]:
!wget -q -O 'List_of_postal_codes_of_Canada.html' https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
print('Data downloaded!')

Data downloaded!


### Convert HTML table to a Dataframe

In [21]:
tables = pd.read_html('List_of_postal_codes_of_Canada.html', header=0)

headings = ['Postal Code', 'Borough', 'Neighborhood']

for table in tables:
    current_headings = table.columns.values[:4]
    if len(current_headings) != len(headings):
        continue
    if all(current_headings == headings):
        break
            


csv_path = 'canada_postal_codes.csv'
table[headings].to_csv(csv_path)
df = pd.read_csv(csv_path, index_col=0, sep=',')



### Inspect Dataframe

In [22]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Drop rows with Borough equals to Not assigned

In [53]:
df_with_borough = df[df['Borough']!='Not assigned']
df_with_borough.reset_index(drop=True, inplace=True)

### Print shape

In [54]:
df_with_borough.shape

(103, 3)

# Geocoding

In [85]:
geocode_data = pd.read_csv('https://cocl.us/Geospatial_data')
geocode_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [89]:
toronto_data = df_with_borough.join(geocode_data.set_index('Postal Code'), on='Postal Code')
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Install map libraries

In [90]:
!conda install -c conda-forge geopy --yes 

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: / ^C
failed

CondaError: KeyboardInterrupt

Solving environment: / ^C
- Folium installed
Libraries imported.


### Cluster neighborhoods

In [114]:
clusters = toronto_data['Borough'].unique()
list_clusters = list(clusters)

## Visualize Clusters

In [123]:
import numpy as np # library to handle data in a vectorized manner
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[43.753259, -79.329656], zoom_start=11)

# set color scheme for the clusters
x = np.arange(clusters.size)
ys = [i + x + (i*x)**2 for i in range(clusters.size)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighborhoods, borough in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood'], toronto_data['Borough']):
    label = folium.Popup(borough + ' cluster: ' + neighborhoods, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[-1],
        fill=True,
        fill_color=rainbow[list_clusters.index(borough)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Explore Clusters

In [125]:
toronto_data.loc[toronto_data['Borough'] == clusters[0]]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
7,M3B,North York,Don Mills,43.745906,-79.352188
10,M6B,North York,Glencairn,43.709577,-79.445073
13,M3C,North York,Don Mills,43.7259,-79.340923
27,M2H,North York,Hillcrest Village,43.803762,-79.363452
28,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
33,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
34,M3J,North York,"Northwood Park, York University",43.76798,-79.487262


In [126]:
toronto_data.loc[toronto_data['Borough'] == clusters[1]]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
36,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752
42,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576


In [127]:
toronto_data.loc[toronto_data['Borough'] == clusters[2]]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
11,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724
17,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201
70,M9P,Etobicoke,Westmount,43.696319,-79.532242
77,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
88,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321
89,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
93,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
94,M9W,Etobicoke,"Northwest, West Humber - Clairville",43.706748,-79.594054
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944


In [128]:
toronto_data.loc[toronto_data['Borough'] == clusters[3]]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
22,M1G,Scarborough,Woburn,43.770992,-79.216917
26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
38,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
44,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
51,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
58,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [129]:
toronto_data.loc[toronto_data['Borough'] == clusters[4]]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
14,M4C,East York,Woodbine Heights,43.695344,-79.318389
23,M4G,East York,Leaside,43.70906,-79.363452
29,M4H,East York,Thorncliffe Park,43.705369,-79.349372
35,M4J,East York,"East Toronto, Broadview North (Old East York)",43.685347,-79.338106


### Thank you for reviewing my work!

This notebook is part of a course on **Coursera** called *Applied Data Science Capstone*.