# Segmenting and Clustering Neighborhoods in Toronto

## Importing required libraries

In [30]:
## Import request and Pandas
import requests
import pandas as pd

#### Get the HTML of the Wiki page, convert into a table with help of read_html.
#### As suggested remove cells with a borough that is Not assigned.

In [31]:
wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikipedia_page = requests.get(wiki)

inp0 = pd.read_html(wikipedia_page.content, header=0)[0]
inp1 = inp0[inp0.Borough != 'Not assigned']

inp1.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


##### Replace "Not assigned" in Neighbourhood with Borough

If we have Neighbourhood Not assigned, we change it with the value of Borough

In [32]:
inp1.Neighbourhood.replace('Not assigned',inp1.Borough,inplace=True)
inp1.head(8)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills


In [33]:
# Checking 'Not assigned' in Neighbourhood
inp1.loc[inp1.Neighbourhood == 'Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood


In [34]:
# Checking value counts for Borough
inp1['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Name: Borough, dtype: int64

In [35]:
# Checking value counts for Neighbourhood
inp1['Neighbourhood'].value_counts()

Downsview                                          4
Don Mills                                          2
Victoria Village                                   1
Northwood Park, York University                    1
India Bazaar, The Beaches West                     1
                                                  ..
Cliffside, Cliffcrest, Scarborough Village West    1
Garden District, Ryerson                           1
First Canadian Place, Underground city             1
Willowdale, Newtonbrook                            1
Humber Summit                                      1
Name: Neighbourhood, Length: 99, dtype: int64

In [36]:
inp1.shape

(103, 3)

##### For making data more presentable and for ease to analyhsis let's group Neighbourhoods with the same Postcode

In [37]:
inp1_toronto = inp1.groupby(['Postal Code', 'Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x))
inp1_toronto = inp1_toronto.reset_index()
inp1_toronto.rename(columns = {'Postal Code':'PostalCode'}, inplace = True)
inp1_toronto.rename(columns = {'Neighbourhood':'Neighborhood'}, inplace = True)
inp1_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [38]:
inp1_toronto.shape

(103, 3)

##### Getting Coordinates (Latitude & Longitude) of each neighborhood

In [39]:
url = 'http://cocl.us/Geospatial_data'
inp1_geo=pd.read_csv(url)
inp1_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [40]:
#Checking shape of the file
inp1_geo.shape

(103, 3)

In [42]:
inp1_toronto = inp1_toronto.join(inp1_geo.set_index('Postal Code'), on='PostalCode')
inp1_toronto.head

<bound method NDFrame.head of     PostalCode      Borough  \
0          M1B  Scarborough   
1          M1C  Scarborough   
2          M1E  Scarborough   
3          M1G  Scarborough   
4          M1H  Scarborough   
..         ...          ...   
98         M9N         York   
99         M9P    Etobicoke   
100        M9R    Etobicoke   
101        M9V    Etobicoke   
102        M9W    Etobicoke   

                                          Neighborhood   Latitude  Longitude  
0                                       Malvern, Rouge  43.806686 -79.194353  
1               Rouge Hill, Port Union, Highland Creek  43.784535 -79.160497  
2                    Guildwood, Morningside, West Hill  43.763573 -79.188711  
3                                               Woburn  43.770992 -79.216917  
4                                            Cedarbrae  43.773136 -79.239476  
..                                                 ...        ...        ...  
98                                          

In [43]:
inp1_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
