# Segmenting and Clustering Neighborhoods in Toronto

## Task 1

In [1]:
import pandas as pd
import numpy as np

In [2]:
data_table = pd.read_html(r"https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]

In [3]:
data = data_table.replace(to_replace = r'\w*assigned\b', value = "", regex=True)

In [4]:
place_lst = []
for i in data.values:
    place_lst.append([j for j in i if len(j) > 7])

In [5]:
place_lst[2][1] = 'M3CNorth York(Don Mills South / Flemingdon Park)'
place_lst[1][1] = 'M3BNorth York(Don Mills North)'

In [6]:
place_lst_mdfd = []
for j in place_lst:
    place_lst_mdfd = place_lst_mdfd + [[i[:3]] + i[3:-1].split("(") for i in j]

In [7]:
len(place_lst_mdfd[2])

3

In [8]:
ans_df = pd.DataFrame(place_lst_mdfd, columns=['PostalCode', 'Borough', 'Neighborhood',"None"]).drop(columns = "None")
ans_df["Neighborhood"] = ans_df["Neighborhood"].str.replace(" /", ",")
ans_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## Task 2

In [9]:
geo_posi = pd.read_csv("Geospatial_Coordinates.csv")
geo_posi.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
ans_df2 = pd.merge(ans_df, geo_posi, how = 'inner', left_on = "PostalCode", right_on = "Postal Code").drop(columns = "Postal Code")
ans_df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Task 3

In [17]:
ans_df2.groupby("Borough")["Neighborhood"].count().sort_values()

Borough
Downtown TorontoStn A PO Boxes25 The Esplanade                   1
East TorontoBusiness reply mail Processing Centre969 Eastern     1
East YorkEast Toronto                                            1
EtobicokeNorthwest                                               1
MississaugaCanada Post Gateway Processing Centre                 1
East Toronto                                                     4
East York                                                        4
York                                                             5
West Toronto                                                     6
Central Toronto                                                  9
Etobicoke                                                       11
Scarborough                                                     17
Downtown Toronto                                                18
North York                                                      24
Name: Neighborhood, dtype: int64

In [18]:
import folium

In [19]:
from geopy.geocoders import Nominatim

In [20]:
address = "Toronto"

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'
      .format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [23]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(ans_df2['Latitude'],
                                           ans_df2['Longitude'],
                                           ans_df2['Borough'],
                                           ans_df2['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto