 Segmenting and Clustering Neighborhoods in Toronto

In [1]:
# all the imports
import pandas as pd
import folium
from geopy.geocoders import Nominatim 


# Scrapping the table with pandas

In [2]:
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
df = tables[0]
df = df[df.Borough != 'Not assigned']
df.drop_duplicates(inplace=True)
print(df)


    Postcode           Borough  \
2        M3A        North York   
3        M4A        North York   
4        M5A  Downtown Toronto   
5        M5A  Downtown Toronto   
6        M6A        North York   
7        M6A        North York   
8        M7A      Queen's Park   
10       M9A         Etobicoke   
11       M1B       Scarborough   
12       M1B       Scarborough   
14       M3B        North York   
15       M4B         East York   
16       M4B         East York   
17       M5B  Downtown Toronto   
18       M5B  Downtown Toronto   
19       M6B        North York   
22       M9B         Etobicoke   
23       M9B         Etobicoke   
24       M9B         Etobicoke   
25       M9B         Etobicoke   
26       M9B         Etobicoke   
27       M1C       Scarborough   
28       M1C       Scarborough   
29       M1C       Scarborough   
31       M3C        North York   
32       M3C        North York   
33       M4C         East York   
34       M5C  Downtown Toronto   
35       M6C  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


# Convert table to proper format

In [3]:
series = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ", ".join(x.astype(str)))
postal_df = series.reset_index()
print(postal_df.head(10))


  Postcode      Borough                                    Neighbourhood
0      M1B  Scarborough                                   Rouge, Malvern
1      M1C  Scarborough           Highland Creek, Rouge Hill, Port Union
2      M1E  Scarborough                Guildwood, Morningside, West Hill
3      M1G  Scarborough                                           Woburn
4      M1H  Scarborough                                        Cedarbrae
5      M1J  Scarborough                              Scarborough Village
6      M1K  Scarborough      East Birchmount Park, Ionview, Kennedy Park
7      M1L  Scarborough                  Clairlea, Golden Mile, Oakridge
8      M1M  Scarborough  Cliffcrest, Cliffside, Scarborough Village West
9      M1N  Scarborough                      Birch Cliff, Cliffside West


In [4]:
print(postal_df.shape)



(103, 3)


# Add latitude longtitude data

In [5]:
lat_long = pd.read_csv('datasets/Geospatial_Coordinates.csv')
lat_long.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
print(lat_long.head())

  Postcode   Latitude  Longitude
0      M1B  43.806686 -79.194353
1      M1C  43.784535 -79.160497
2      M1E  43.763573 -79.188711
3      M1G  43.770992 -79.216917
4      M1H  43.773136 -79.239476


In [7]:
df_merged = postal_df.merge(lat_long, how='left', on='Postcode')
# df_merged = df_merged[['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longtitude']]
print(df_merged.head(10))



  Postcode      Borough                                    Neighbourhood  \
0      M1B  Scarborough                                   Rouge, Malvern   
1      M1C  Scarborough           Highland Creek, Rouge Hill, Port Union   
2      M1E  Scarborough                Guildwood, Morningside, West Hill   
3      M1G  Scarborough                                           Woburn   
4      M1H  Scarborough                                        Cedarbrae   
5      M1J  Scarborough                              Scarborough Village   
6      M1K  Scarborough      East Birchmount Park, Ionview, Kennedy Park   
7      M1L  Scarborough                  Clairlea, Golden Mile, Oakridge   
8      M1M  Scarborough  Cliffcrest, Cliffside, Scarborough Village West   
9      M1N  Scarborough                      Birch Cliff, Cliffside West   

    Latitude  Longitude  
0  43.806686 -79.194353  
1  43.784535 -79.160497  
2  43.763573 -79.188711  
3  43.770992 -79.216917  
4  43.773136 -79.239476  
5  43.7

# Draw a map of Toronto

In [8]:
address = 'Toronto'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))


  


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [10]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto
