# Coursera Capstone Week #3 - Clustering Neighborhoods in Toronto

First, I'm importing all the required libraries

In [10]:
from bs4 import BeautifulSoup as bsp
import requests as req
import pandas as pd
import geocoder
import os

In [11]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = req.get(url)

In [12]:
soup = bsp(page.text, 'html.parser')
table = soup.find('table')

# Scraping the data from the table and formatting it
Here I use bs4 to scrape the table and sort it into a 

In [13]:
table_contents=[]
for data in table.findAll('td'):
    cell = {}
    if data.span.text == "Not assigned":
        pass
    else:
        cell['Postal Code']= data.p.text[:3]
        cell['Borough'] = (data.span.text).split('(')[0]
        cell['Neighborhood'] = (((((data.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)


### Converting into dataframe
Next, I'm going to convert this table into a dataframe

In [14]:
df = pd.DataFrame(table_contents)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


## Next steps
Now that we have our data in a dataframe, everything is almost complete.
Although, we do have to remove one postal code because according to Wikipedia, <u>Postal code M7R is not included in the city of Toronto</u>

In [15]:
df.drop(df.loc[df['Postal Code']=='M7R'].index, inplace=True)

In [16]:
df.shape

(102, 3)

In [17]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


 # <strike> Using Geocoder </strike>
<strike> Now, we're going to import the geocoder library and use it to assign latitude and longitudes to the neighborhoodsimport geocoder # import geocoderimport geocoder # import geocoder </strike>
    
    
 # Using Weather API
 
 Since Geocoder seemed to be down for whatever reason, I'm going to be using the weather API
 


In [20]:
# I've set my api key as an environment variable and will load it now
key = os.environ.get('WEATHER_API_KEY')

In [48]:
def getLatLong(postal_code):
    latlong=[0,0]
    url = 'http://api.weatherapi.com/v1/current.xml?key={}&q={}'.format(key,postal_code)
    page = req.get(url)
    soup = bsp(page.text,'xml')
    latlong[0]=soup.find('lat').text
    latlong[1]=soup.find('lon').text
    return latlong
    

In [54]:
for postal_code in df['Postal Code']:
    latlong = getLatLong(postal_code)
    df.loc[df['Postal Code']==postal_code,'Latitude']=latlong[0]
    df.loc[df['Postal Code']==postal_code,'Longitude']=latlong[1]
    

In [55]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.76,-79.32
1,M4A,North York,Victoria Village,43.73,-79.31
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.66,-79.37
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72,-79.45
4,M7A,Queen's Park,Ontario Provincial Government,43.66,-79.39
