# Segmenting and Clustering Neighborhoods in Toronto - Part 2

## Part 1 : Loading Toronto data in dataframe

In [1]:
import numpy as np
import pandas as pd

#### Loading data from Wikipedia

In [2]:
df=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]

In [3]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West
285,M8Z,Etobicoke,South of Bloor


#### Rename column Neighbourhood to Neighborhood

In [4]:
df.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)

#### Removing rows with value of Borough "Not assigned"

In [5]:
df=df[df['Borough']!='Not assigned']

In [6]:
df=df.groupby(['Postcode']).agg({'Borough': 'first','Neighborhood':lambda x: ", ".join(x)}).reset_index()

#### Assigning the borough name to the neighborhood with value "Not assigned"

In [7]:
df[df['Neighborhood']=='Not assigned']

Unnamed: 0,Postcode,Borough,Neighborhood
93,M9A,Queen's Park,Not assigned


In [8]:
for i in range(len(df)):
    if df['Neighborhood'].iloc[i]=='Not assigned':
        df['Neighborhood'].iloc[i]=df['Borough'].iloc[i]

In [9]:
df.loc[93]

Postcode                 M9A
Borough         Queen's Park
Neighborhood    Queen's Park
Name: 93, dtype: object

#### The size of dataframe

In [10]:
df.shape

(103, 3)

## Part 2 : Latitude and the longitude coordinates of each neighborhood

#### Loading latitude and longitude from Google Maps

In [11]:
dfc=pd.read_csv('http://cocl.us/Geospatial_data')

In [12]:
dfc

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


#### Creating a new dataframe by meging df and dfc (Adding latitude and longitue to post code)

In [13]:
dfcn=df.merge(dfc,left_on='Postcode',right_on='Postal Code')

In [14]:
dfcn

Unnamed: 0,Postcode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
...,...,...,...,...,...,...
98,M9N,York,Weston,M9N,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,M9P,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",M9R,43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",M9V,43.739416,-79.588437


In [15]:
dfcn.drop('Postal Code',axis=1, inplace=True)

In [16]:
dfcn

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437


# The end of part 2

# Thank you