# Segmenting and Clustering Neighborhoods in the city of Toronto, Canada

### Part 1 Solution

In [17]:
# importing requests & pandas & folium & geopy
import requests
import pandas as pd
import folium
from geopy.geocoders import Nominatim 

# installing beautifulsoup4
!pip install beautifulsoup4

#importing beautiful soup
from bs4 import BeautifulSoup


print ('Required packages imported!')

Required packages imported!


In [2]:
# Importing the web page using requests
url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text


In [3]:
# using beautifulsoup to parse the html contents
soup = BeautifulSoup(url, "html.parser")

# extracting html formated table by its class name "wikitable sortable" from soup object
table = soup.find("table",{"class":"wikitable sortable"})


In [4]:
# constructing a list from the html table
data = []
rows = table.find_all("tr")
for row in rows:
    # constructing the header
    if row.find("th"):
        header = [t.text.strip() for t in row.find_all("th")]
        
    #constructing the body
    else:
        cols = [t.text.strip() for t in row.find_all("td")]
        data.append(cols)

# length of constructed list
print ("Table has {} rows and {} columns".format(len(data),len(data[0])))


Table has 180 rows and 3 columns


In [5]:
# converting the list to Pandas Dataframe with header variable as DF header
df = pd.DataFrame(data, columns = header)

#Dataframe size & columns name
print ("Dataframe has {} rows and {} columns".format(df.shape[0],df.shape[1]))
print ('\n')

#Dataframe first 12 rows
df.head(12)

Dataframe has 180 rows and 3 columns




Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [6]:
#dropping rows with borough not assigned
df = df[df.Borough != 'Not assigned']

# There is no duplicated rows with same postal code to be combined
df[df[['Postal Code']].duplicated()]

#updated Dataframe size:
print ("Updated Dataframe has {} rows and {} columns".format(df.shape[0],df.shape[1]))

Updated Dataframe has 103 rows and 3 columns


---
## Part 2 Solution

In [7]:
# Loading coordinates from CSV file to a Dataframe, as i tried to use geocoder package but results were inconsistent 
df_ll = pd.read_csv("http://cocl.us/Geospatial_data")
df_ll.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
#Merging df with df_ll on postal code column
df_new = df.merge(df_ll, on= 'Postal Code')
df_new.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


---
## Part 3 Solution

In [21]:
#aGetting Toronto Coordinates

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode('Toronto, Ontario, Canada')
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [22]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, postal, borough, neighborhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Postal Code'] ,df_new['Borough'], df_new['Neighborhood']):
    label = '{}, {}, {}'.format(postal, borough, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto #show the map