## Segmenting and Clustering Neighborhoods in Toronto - IBM Data Science Capstone Project

### Part1. Preparing Toronto neighborhoods data.

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### 1. Obtain Toronto neighborhood data from Wikipedia.

In [2]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
table = soup.find('table')
df = pd.read_html(str(table))[0]
df.columns = df.iloc[0]
df = df.drop(0)

#### 2. Drop cells with a borough that is Not assigned.

In [3]:
df = df[df.Borough != 'Not assigned']

#### 3. combine neighborhood which are under the same postal code/borough area.

In [4]:
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()

#### 4. If a cell has a borough but a Not assigned neighborhood, then the neighborhood with the same name as the borough.

In [5]:
df['Neighbourhood'].replace('Not assigned', df['Borough'], inplace=True)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### 5. print the number of rows of the dataframe.

In [6]:
df.shape[0]

103

### Part 2. Import geographical coordinates of Toronto and join to neighborhood information.

#### 6. Import Geospatial data from csv file.

In [7]:
cd = pd.read_csv('http://cocl.us/Geospatial_data')
cd.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### 7. Create a dataset that has the geographical coordinates of the neighborhood.

In [8]:
dfcd = df.set_index('Postcode').join(cd.set_index('Postal Code'))

In [9]:
dfcd = dfcd.reset_index()
dfcd.head()


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
