<h1 style="text-align:center">Segmenting and Clustering Neighborhoods in Toronto</h1>
<hr>

<h3>Scrapping data from wikipedia</h3>

In [13]:
import pandas as pd

In [14]:
wikipedia_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [15]:
df_list = pd.read_html(wikipedia_url)  # scrapping list of dataframes from html

<h3>Data wrangling</h3>

In [16]:
df = df_list[0].copy()  # As there is only one table in html

df = df[df['Borough'] != 'Not assigned']  # droppring 'Not assigned' Borough

## combining into one row with the neighborhoods 
## separated with a comma if Postalcode are same
df = df.groupby('Postal Code', as_index=False).agg(lambda x: ", ".join(x))  

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
df['Neighborhood'][df['Neighborhood'] == 'Not assigned'] = df['Borough'][df['Neighborhood'] == 'Not assigned']

# updating column name
df.columns = ['PostalCode', 'Borough', 'Neighborhood']

# printing shape
print(df.shape)

(103, 3)


In [17]:
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


<h3>Assigning latitude and longitude</h3>

In [18]:
file_path = './Geospatial_Coordinates.csv'
df_coordinates = pd.read_csv(file_path)
df_coordinates.columns = ['PostalCode', 'Latitude', 'Longitude']

In [19]:
df = df.merge(df_coordinates,on='PostalCode')  # merging two dataframes on 'PostalCode' 

In [20]:
df.head(8)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
