# Segmenting and Clustering Neighborhoods in Toronto

## Import the Libraries  

In [1]:
from bs4 import BeautifulSoup
import requests 
import csv
import json
import xml
import pandas as pd
import numpy as np

## Download the Wiki Link

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
WikiToronto = requests.get(url).text

## Parse the URL into BeautifulSoup Object:

In [3]:
Toronto_soup = BeautifulSoup(WikiToronto, 'lxml')


## Finding the Table that is required| 

In [4]:
Toronto_table=Toronto_soup.find('table')


## Find all of the fields Data: 

In [5]:
fields = Toronto_table.find_all('td')
len(fields)

864

## Create the DataFrame 

In [6]:


postcode = []
borough = []
neighbourhood = []

 
for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighbourhood.append(fields[i+2].text.strip())

 
df_tor = pd.DataFrame(data=[postcode, borough, neighbourhood]).transpose()
df_tor.columns = ['Postalcode', 'Borough', 'Neighborhood']
df_tor.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Removing the "Not Assigned" Borughs from the DataFrame

In [7]:
df_tor['Borough'].replace('Not assigned', np.nan, inplace=True)
df_tor.dropna(subset=['Borough'], inplace=True)
df_tor.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


## Replacing the "Not Assigned" Neighborhoods with the Borough Name

In [8]:
df_tor[df_tor['Neighborhood'].str.match('Not assigned')]


Unnamed: 0,Postalcode,Borough,Neighborhood
8,M7A,Queen's Park,Not assigned


In [9]:
df_tor['Neighborhood'].replace('Not assigned', "Queen's Park", inplace=True)

df_tor[df_tor['Neighborhood'].str.match('Not assigned')]


Unnamed: 0,Postalcode,Borough,Neighborhood


In [10]:
df_tor.head(10)


Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


## Grouping the neighborhoods together for the same postal code and Borough 

In [11]:
tor_df = df_tor.groupby(['Postalcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()

tor_df.head(10)


Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Display the shape of DataFrame 

In [12]:
tor_df.shape


(103, 3)

In [13]:
df_geo = pd.read_csv('http://cocl.us/Geospatial_data')
df_geo.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [14]:
df_geo.columns = ['Postalcode', 'Latitude', 'Longitude']

df_torcomplete = pd.merge(tor_df, df_geo, on=['Postalcode'], how='inner')

df_torcomplete.head(10)


Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
