# STEP 2 of Segmenting and Clustering Neighborhoods in Toronto
---

## 2. Dataframe of the postal code of each neighborhood along with the borough name and neighborhood name in order to utilize the FourSquare

## 2.a) initialization of the 1st step of the assignment

In [2]:
 # uncomment this line if the geocoder is not install
#!pip install geocoder

In [3]:
import requests
from bs4 import BeautifulSoup

website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

# find the table class 'wikitable sortable'
soup = BeautifulSoup(website_url,'lxml')
Table_M_Toronto = soup.find('table',{'class':'wikitable sortable'})

#Datas under 'wikitable sortable'
datas_rows = Table_M_Toronto.findAll('tr')

#extract all datas to a list
list_toronto_M =[]
for data_row in datas_rows:
    cols=data_row.find_all('td')
    cols=[x.text.strip() for x in cols]
    list_toronto_M.append(cols)
#delete the empty first row from the title before to convert into dataframe    
list_toronto_M.pop(0)     
#import library
import pandas as pd
#Convert list into dataframe
df_toronto_M= pd.DataFrame(list_toronto_M,columns=['PostalCode','Borough','Neighborhood'])
#delete all rows where borough is not assigned
df_toronto_M = df_toronto_M[df_toronto_M.Borough != 'Not assigned']
df_toronto_M=df_toronto_M.reset_index(drop=True)
#Assigning value for neighbordhood is not assigned
df_toronto_M.loc[(df_toronto_M.Neighborhood == 'Not assigned'),'Neighborhood']=df_toronto_M.Borough
df_toronto_M.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## 2.b) Latitude and Longitude information
Note : since the geocoder it's unreliable, I used the CSV File from this link http://cocl.us/Geospatial_data

## Reading the csv file 

In [4]:
#read the csv file Geospatial_Coordinates.csv
df_coordinate = pd.read_csv('Geospatial_Coordinates.csv')

#rename the column Postal Code
df_coordinate=df_coordinate.rename(columns={"Postal Code": "PostalCode"})

## Setting the datas in a dataframe

In [5]:
#set PostalCode as an index
df_coordinate_dict= pd.DataFrame(df_coordinate.set_index('PostalCode').T.to_dict('index'))

#add a name to the index (opitonal but usefull for the merge)
df_coordinate_dict=df_coordinate_dict.rename_axis('PostalCode')

#result
df_coordinate_dict

Unnamed: 0_level_0,Latitude,Longitude
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
...,...,...
M9N,43.706876,-79.518188
M9P,43.696319,-79.532242
M9R,43.688905,-79.554724
M9V,43.739416,-79.588437


## Setting the postal code as index to the first dataframe

In [6]:
#set PostalCode as an index
df_toronto_M = df_toronto_M.set_index('PostalCode')
#result
df_toronto_M

Unnamed: 0_level_0,Borough,Neighborhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
M4Y,Downtown Toronto,Church and Wellesley
M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


## Merging the 2 dataframes with PostalCode as index key

In [7]:
#Merging 
mergedDf = pd.merge(df_toronto_M,df_coordinate_dict[['Latitude','Longitude']], on='PostalCode')

#reset the index 
mergedDf = mergedDf.reset_index()

#result
mergedDf

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509
