# Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

## Question 1

In [1]:
# Let's import all relevant packages...

from bs4 import BeautifulSoup
import requests
import pandas as pd

In [46]:
# Now let's search for an url to extract the data from. Please NOTE, the newest version of the WikiPedia Page was probably not the most easy one
# to extract data from, that's why I chose for an older version, which provides me with the same data, but in better format.

url = "https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=1011037969"
toronto = pd.read_html(requests.get(url).text)
toronto

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [47]:
# As we only need the first table, let's make sure we only extract that one...

toronto = toronto[0]
toronto

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [48]:
# As we have to get rid of the Not Assigned boroughs, let's drop them right away! Also, to have a clear overview, I will group by Postal Code.

toronto = toronto[toronto['Borough'] != 'Not assigned']
toronto = toronto.groupby(['Postal Code']).head()
toronto

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [49]:
# To make sure we're ready to go, let's check if there is still a Not Assigned value in the Neighborhood Column...

toronto.Neighbourhood.str.count('Not assigned').sum()

0

In [51]:
# It seems like we have no more work to do for the Not Assigned values, then let's finish with the shape of the table!

toronto.shape

(103, 3)

### The answer to question 1 is (103, 3)

## Question 2

In [53]:
!pip install geocoder

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 11.2 MB/s eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [55]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None
postal_code = '###'

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

KeyboardInterrupt: 

### The above code was running for such a long time without any response, I decided to stop it and go forward with the csv

In [67]:
# Let's start with reading the csv provided by Courseran and check data types for merging...

Data_Coursera = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv')
Data_Coursera.dtypes

Postal Code     object
Latitude       float64
Longitude      float64
dtype: object

In [68]:
# Great, we're all set, let's merge both tables on the Postal Code, as this value is in both tables!

toronto_complete = toronto.join(Data_Coursera.set_index('Postal Code'), on = 'Postal Code', how = 'inner', sort = 'False')
toronto_complete

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
9,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
18,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
27,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
36,M1G,Scarborough,Woburn,43.770992,-79.216917
45,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
107,M9P,Etobicoke,Westmount,43.696319,-79.532242
116,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
143,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [69]:
# I always like to group my data, therefore I again group the table on Postal Code...

toronto_complete = toronto_complete.groupby(['Postal Code']).head()
toronto_complete

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
9,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
18,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
27,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
36,M1G,Scarborough,Woburn,43.770992,-79.216917
45,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
107,M9P,Etobicoke,Westmount,43.696319,-79.532242
116,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
143,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
