# Week 3: Segmenting and Clustering Neighborhoods in Toronto

### 1. Loading libraries

In [38]:
# loading libraries from Lab's notebook

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

#!conda install -c beautifulsoup4
from bs4 import BeautifulSoup
import requests

print('Libraries imported.')

Libraries imported.


### 2. Scrapping list of neighborhoods from https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [39]:
r  = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
data = r.text
soup = BeautifulSoup(data)
soup = BeautifulSoup(str(soup).replace("\n", ""))

In [40]:
Postal = []
Borough = []
Neighbourhood = []

for items in soup.find('table', class_='wikitable').find_all('tr')[1::1]:
    data = items.find_all(['th','td'])
    try:
        Postal.append(data[0].text)
        Borough.append(data[1].text)
        Neighbourhood.append(data[2].text)
    except IndexError:pass


neighbourhood = pd.DataFrame(pd.DataFrame(data={
    'Postal' : Postal,
    'Borough' : Borough,
    'Neighborhood' : Neighbourhood
    }))

### 3. Cleaning and organizing the data

In [41]:
neighbourhood['Neighborhood'].replace("Not assigned", neighbourhood["Borough"],inplace=True)
neighbourhood = neighbourhood[neighbourhood.Borough != 'Not assigned']
neighbourhood = neighbourhood.groupby(['Postal', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()

### 4. Printing head and shape of dataframe

In [42]:
neighbourhood.head(10)

Unnamed: 0,Postal,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [24]:
neighbourhood.shape

(103, 3)

### 5. Adding geo data

In [25]:
geo = pd.read_csv('http://cocl.us/Geospatial_data', names=['Postal', 'Latitude', 'Longitude'])
geo.head()

Unnamed: 0,Postal,Latitude,Longitude
0,Postal Code,Latitude,Longitude
1,M1B,43.8066863,-79.1943534
2,M1C,43.7845351,-79.1604971
3,M1E,43.7635726,-79.1887115
4,M1G,43.7709921,-79.2169174


In [26]:
toronto_df = pd.merge(neighbourhood, geo, on='Postal')
toronto_df["Latitude"] = toronto_df.Latitude.astype(float)
toronto_df["Longitude"] = toronto_df.Longitude.astype(float)
toronto_df.head(10)

Unnamed: 0,Postal,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
