<a href="https://colab.research.google.com/github/klfontus/Coursera_Capstone/blob/main/Week_3_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <center> Segmenting and Clustering Toronto Neighborhoods

## Creating a DataFrame of Toronto Neighborhoods and their Postal Codes

##### Import necessary libraries

In [2]:
#!pip install bs4
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests


##### URL for Toronto postal codes

In [3]:
#Wikipedia page with table containing postal codes of Toronto neighborhoods
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

##### Scraping data from Wikipedia page

In [4]:
data  = requests.get(url).text
soup = BeautifulSoup(data, 'html5lib')
soup.prettify();

##### Locating the table and isolating its rows

In [5]:
table = soup.find('table')
cols = ['PostalCode', 'Borough', 'Neighborhood']
#print(table)
table_rows = table.find_all('tr')
#table_rows[0]

##### Initializing the data frame and adding table data

In [34]:
neighborhood_data = pd.DataFrame(columns=cols)

#cell = table_rows.find_all('td')
for row in table_rows:
  for col in row.find_all('td'):
    
    #print(col)
    if col.span.text=='Not assigned':
        continue
    #print(col.span.string)
    postalCode = col.p.text[:3]
    borough = (col.span.text).split('(')[0]
    neighborhood = (((((col.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
    neighborhood_data.loc[len(neighborhood_data)] = [postalCode, borough, neighborhood]
neighborhood_data['Borough']=neighborhood_data['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
#    if col.find('i'): continue
#    postalCode = col.find('b').string
#    try:
#      names = col.find_all('a')
      #print(names)
#      borough = names[0].string
      #print(borough)
#      hoods=""
#      for i in range(1,len(names)):
#        hoods = hoods + names[i].string + ", "
#      neighborhood = hoods[:len(hoods) - 2]
      #print(postalCode, " ", borough, " ", neighborhood)
#      neighborhood_data.loc[len(neighborhood_data)] = [postalCode, borough, neighborhood]
#    except:
#      continue

neighborhood_data["PostalCode"] = neighborhood_data['PostalCode'].astype('string')
neighborhood_data["Borough"] = neighborhood_data['Borough'].astype('string')
neighborhood_data["Neighborhood"] = neighborhood_data['Neighborhood'].astype('string')

neighborhood_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [7]:
neighborhood_data.shape

(103, 3)

## Getting Geo Data for Toronto Neighborhoods

##### Getting Postal Code Geo Data

In [36]:
#GEOCODER WOULD NOT WORK
#!pip install geocoder
#import geocoder 

In [9]:
!wget -q -O 'postal_geo.csv' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv

postal_geo = pd.read_csv('postal_geo.csv')
postal_geo["Postal Code"] = postal_geo['Postal Code'].astype('string')
postal_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


##### Extracting Latitude and Longitude from Geo Data and Adding it to Neighborhood Data Frame





In [45]:
df_lat = []
df_long = []

# loop until you get the coordinates
for i in neighborhood_data["PostalCode"]:
  #print(postal_geo[postal_geo["Postal Code"] == i])
  code = postal_geo.loc[postal_geo["Postal Code"] == i]#.values
  #code.shape()
  #print(code.iloc[0]["Latitude"])
  df_lat.append(code.iloc[0]["Latitude"])
  df_long.append(code.iloc[0]["Longitude"])

In [46]:
neighborhood_data["Latitude"] = df_lat
neighborhood_data["Longitude"] = df_long
neighborhood_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [47]:
neighborhood_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 103 entries, 0 to 102
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   PostalCode    103 non-null    string 
 1   Borough       103 non-null    string 
 2   Neighborhood  103 non-null    string 
 3   Latitude      103 non-null    float64
 4   Longitude     103 non-null    float64
dtypes: float64(2), string(3)
memory usage: 4.8 KB


# Exploring Toronto Neighborhoods

In [35]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [51]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhood_data["Latitude"], 
                                                  neighborhood_data["Longitude"], 
                                                  neighborhood_data["Borough"], 
                                                  neighborhood_data["Neighborhood"]):
    label = '{}: {}'.format(borough, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto