# Part 1 of Week 3 Assignment

## Creating a dataframe for all the neighbourhoods in Toronto

Installing necessary packages

In [1]:
#!pip3 install pandas
#!pip3 install bs4
#!pip3 install requests
#!pip3 install lxml

In [2]:
import pandas as pd
import numpy as np
import requests

from bs4 import BeautifulSoup

Using BeautifulSoup to parse the contents of the wikipedia page that contains all the neighbourhoods.

In [3]:
wikipedia_url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_text = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

soup = BeautifulSoup(wiki_text,'html.parser')

Reading through the table and organize the contents into a tabular data

In [4]:
table_contents=[]
table=soup.table
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

Cleaning up the data and replacing some of the data to a more readable format.

In [5]:
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [6]:
df.shape

(103, 3)

# Part 2 of Week 3 Assignment

## Adding LatLong to the neighbourhoods

In [7]:
latlong = pd.read_csv('Geospatial_Coordinates.csv')
latlong.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
df=df.merge(latlong,left_on='PostalCode',right_on='Postal Code')
df=df.drop('PostalCode',axis=1)
df.head()

Unnamed: 0,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,North York,Parkwoods,M3A,43.753259,-79.329656
1,North York,Victoria Village,M4A,43.725882,-79.315572
2,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.65426,-79.360636
3,North York,"Lawrence Manor, Lawrence Heights",M6A,43.718518,-79.464763
4,Queen's Park,Ontario Provincial Government,M7A,43.662301,-79.389494


# Part 3 of Week 3 Assignment

## Analyizing Toronto

Visualizing all Toronto neighbourhoods

In [9]:
#!pip3 install folium
import folium

In [10]:
def draw_map(df):
    toronto = folium.Map(location=[43.651070,-79.347015],zoom_start=10)
    for lat,lng,borough,neighborhood in zip(df['Latitude'],df['Longitude'],df['Borough'],df['Neighborhood']):
        label = '{}, {}'.format(neighborhood, borough)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
        [lat,lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto)
    return toronto

In [11]:
map_toronto = draw_map(df)
map_toronto

Selecting Downtown Toronto only for further analysis

In [12]:
downtown_toronto = df[df['Borough']=='Downtown Toronto']
downtown_toronto.head()

Unnamed: 0,Borough,Neighborhood,Postal Code,Latitude,Longitude
2,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.65426,-79.360636
9,Downtown Toronto,"Garden District, Ryerson",M5B,43.657162,-79.378937
15,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
20,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306
24,Downtown Toronto,Central Bay Street,M5G,43.657952,-79.387383


In [13]:
downtown_map = draw_map(downtown_toronto)
downtown_map

In [14]:
#!pip3 install sklearn
from sklearn.cluster import KMeans

Clustering downtown toronto into 4 clusters based on their geo location

In [15]:
k=4
toronto_clustering = downtown_toronto.drop(['Postal Code','Borough','Neighborhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(toronto_clustering)
kmeans.labels_
downtown_toronto.insert(0, 'Cluster Labels', kmeans.labels_)
downtown_toronto.head()

Unnamed: 0,Cluster Labels,Borough,Neighborhood,Postal Code,Latitude,Longitude
2,1,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.65426,-79.360636
9,1,Downtown Toronto,"Garden District, Ryerson",M5B,43.657162,-79.378937
15,1,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
20,1,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306
24,1,Downtown Toronto,Central Bay Street,M5G,43.657952,-79.387383


In [16]:
#!pip3 install matplotlib
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

Visualizing clusters on the map

In [17]:
map_clusters = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, neighbourhood, cluster in zip(downtown_toronto['Latitude'], downtown_toronto['Longitude'], downtown_toronto['Neighborhood'], downtown_toronto['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

**Note: For some reason Github doesn't render the folium maps. You can clone the repo locally and run the notebook to view the map I include the screenshot below:**

![alt text](map_screenshot.png)