# Coursera Capstone Project  
## Week 3 Project

Segmenting and Clustering Neighborhoods in Toronto

All the 3 questions have been combined into one book. We start off by importing the required libraries

In [1]:
#Importing libraries
import pandas as pd
import numpy as np
import requests
import geocoder
import geopy
import folium

We now pull the table from the wiki link using *pd.read_html* which creates a list 3 tables from the link, we then select the 1st table from the list

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = requests.get(url).content
df_list = pd.read_html(html)
df = df_list[0]
df.head()

Unnamed: 0,0,1,2
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


* The header of the table is blank and we find that the first row contains the header information, so we set the first row to be the column name and we set the table to contain values from row 2 onwards
* We also notice that the mising values are set as *'Not Assigned'* and replace it with nan values so it becomes easier to drop rows

In [3]:
headers = df.iloc[0]
df = pd.DataFrame(df.values[1:], columns = headers).replace('Not assigned', np.nan)

As mentioned in the assignment, we drop all rows missing values in Borough column and we replace the Neighbouhood null values with Borough values in the remaining table

In [4]:
df = df[pd.notnull(df['Borough'])]
df['Neighbourhood'] = df['Neighbourhood'].fillna(df['Borough'])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


We tried using geocoder, but unfortunately it wasnt returning any values and hence have commented out

In [5]:
# Geocoder not responding
"""
lat_lng_coords = None

while(lat_lng_coords is None):
    g = geocoder.google('{}, Toronto, Ontario'.format(df.columns[0]))
    lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

"""

"\nlat_lng_coords = None\n\nwhile(lat_lng_coords is None):\n    g = geocoder.google('{}, Toronto, Ontario'.format(df.columns[0]))\n    lat_lng_coords = g.latlng\n\nlatitude = lat_lng_coords[0]\nlongitude = lat_lng_coords[1]\n\n"

We approach the alternate solution, accessing the link containing the csv file

In [6]:
url_2 = 'http://cocl.us/Geospatial_data'
df_2 = pd.read_csv(url_2)
df_2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


We now merge the second dataframe with the first one using postal code using left join, we drop the 'Postal Code' code column from the second table and rename the columns to match the screenshot given in the assignment

In [7]:
df_3 = df.merge(df_2[['Postal Code','Latitude','Longitude']], left_on ='Postcode', right_on = 'Postal Code', how = 'left')
df_3.drop(labels ='Postal Code',axis=1, inplace=True)
df_3.columns = ['PostalCode','Borough','Neighbourhood','Latitude','Longitude']
df_3.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


In [8]:
df_4 = df_3[df_3.Borough.str.contains("Toronto")]
df_4.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
13,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
14,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
27,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


In [9]:
df_4.shape

(74, 5)

In [10]:
#Create a map of Toronto
latitude = 43.6532
longitude = -79.3832
map_toronto = folium.Map(location=[latitude,longitude], zoom_start = 10)
map_toronto

In [11]:
#adding markers to map 

for lat, lng, borough, neighborhood in zip(df_4.Latitude, df_4.Longitude, df_4.Borough, df_4.Neighbourhood):
    label = '{},{}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup = label,
    color='blue',
    fill = True,
    fill_color = '#3186cc',
    fill_opacity = 0.7).add_to(map_toronto)

map_toronto