# Explore and cluster the neighborhoods in Toronto.

### Import libraries

In [3]:
## !pip install geopy
## !pip install folium
import pandas as pd
from bs4 import BeautifulSoup
import requests

from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/4f/86/1ab30184cb60bc2b95deffe2bd86b8ddbab65a4fac9f7313c278c6e8d049/folium-0.9.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 195kB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.9.1
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


### Parse the Wikipedia page

In [4]:
src = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
sp = BeautifulSoup(src,'lxml')

### Find the table on the wikipedia page for required information

In [5]:
tbl_zipinf = sp.find('table')

col_values = tbl_zipinf.find_all('td')

count_elem = len(col_values)

postcode = []
borough = []
neighborhood = []

for i in range(0, count_elem, 3):
    postcode.append(col_values[i].text.strip())
    borough.append(col_values[i+1].text.strip())
    neighborhood.append(col_values[i+2].text.strip())

### Create Dataframe

In [6]:
df_postcode = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
df_postcode.columns = ['Postcode', 'Borough', 'Neighborhood']

### Clean the data

In [7]:
df_postcode.drop(df_postcode[df_postcode['Borough'] == 'Not assigned'].index, inplace=True)
df_postcode.loc[df_postcode.Neighborhood == 'Not assigned', "Neighborhood"] = df_postcode.Borough

### Group the data by Postcode and Borough

In [8]:
df_grp = df_postcode.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_grp.columns = ['Postcode', 'Borough', 'Neighborhood']

### Read the Geospatial csv file 

In [9]:
df_latlng = pd.read_csv('http://cocl.us/Geospatial_data')
df_latlng.columns = ['Postcode', 'Latitude', 'Longitude']

In [11]:
df_join = pd.merge(df_grp, df_latlng, on=['Postcode'], how='inner')

### Explore and cluster the neighborhoods in Toronto.

In [12]:
neighborhoods = df_join[['Borough', 'Neighborhood', 'Latitude', 'Longitude']].copy()
neighborhoods.head(5)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476


In [13]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [14]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [15]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto