In [None]:
# Part 2 of Segmenting and Clustering Neighborhoods in Toronto:


#With the postal codes, Boroughs and Neighborhoods created, the next step is to get the latitude and the longitude coordinates of each neighborhood.

#OSituation: 

#In an older version of this course, we were leveraging the Google Maps Geocoding API to get the latitude and the longitude coordinates of each neighborhood. However, recently Google started charging for their API: #http://geoawesomeness.com/developers-up-in-arms-over-google-maps-api-insane-price-hike/, so we will use the Geocoder Python package instead: https://geocoder.readthedocs.io/index.html.

#The problem with this Package is you have to be persistent sometimes in order to get the geographical coordinates of a given postal code. So you can make a call to get the latitude and longitude coordinates of a given postal code and the #result would be None, and then make the call again and you would get the coordinates. So, in order to make sure that you get the coordinates for all of our neighborhoods, you can run a while loop for each postal code. Taking postal code #M5G as an example, your code would look something like this:

#import geocoder # import geocoder

# initialize your variable to None
# lat_lng_coords = None

# loop until you get the coordinates
#while(lat_lng_coords is None):
#  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#  lat_lng_coords = g.latlng

#latitude = lat_lng_coords[0]
#longitude = lat_lng_coords[1]

#Given that this package can be very unreliable, in case you are not able to get the geographical 
#coordinates of the neighborhoods using the Geocoder package, here is a link to a csv file that has the geographical coordinates of each postal code: http://cocl.us/Geospatial_data

In [88]:
#import beautifulsoup, requests and pandas
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [89]:
#set up to get neighborhood table from wikipedia
Info_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(Info_url).text
soup = BeautifulSoup(source, 'xml')
n_table=soup.find('table')

In [90]:
#Establish neighborhood dataframe has three columns: PostalCode, Borough, and Neighborhood
column_names = ['Postalcode','Borough','Neighborhood'] # add the two columns for the locations
df=''; join_df=''; geo_df=''
df = pd.DataFrame(columns = column_names)
df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood


In [91]:
# Find postcode, borough, neighborhood in xml code
for tr_value in n_table.find_all('tr'):
    row_info=[]
    for td_value in tr_value.find_all('td'):
        row_info.append(td_value.text.strip())
    if len(row_info)==3:
        df.loc[len(df)] = row_info

In [72]:
#Remove the not assigned values
df=df[df['Borough']!='Not assigned']

In [92]:
#Create a new dataframe that has consolidated neighborhood in a Borough
new_df=df.groupby('Postalcode')['Neighborhood'].apply(lambda x: "%s" % ', '.join(x))
new_df=new_df.reset_index(drop=False)
new_df.rename(columns={'Neighborhood':'Neighborhood_joined'},inplace=True)
new_df.head()

Unnamed: 0,Postalcode,Neighborhood_joined
0,M1A,
1,M1B,"Malvern, Rouge"
2,M1C,"Rouge Hill, Port Union, Highland Creek"
3,M1E,"Guildwood, Morningside, West Hill"
4,M1G,Woburn


In [94]:
#Join the dataframes with new neighborhood values
join_df = pd.merge(df, new_df, on='Postalcode')
join_df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Neighborhood_joined
0,M1A,Not assigned,,
1,M2A,Not assigned,,
2,M3A,North York,Parkwoods,Parkwoods
3,M4A,North York,Victoria Village,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront","Regent Park, Harbourfront"


In [95]:
#Drop old Neighborhood column, values in data frame that are duplicate and rename new Neighorhood column
join_df.drop(['Neighborhood'],axis=1,inplace=True)
join_df.drop_duplicates(inplace=True)
join_df.rename(columns={'Neighborhood_joined':'Neighborhood'},inplace=True)

In [96]:
join_df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [97]:
join_df.shape

(180, 3)

#Part 2 - getting the values for Latitude and Longitude

In [98]:
#Attempted to use this but could not recognize geocoder so used csv file instead
import geocoder # import geocoder

postal_code = 'M5A'

# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

print(latitude,longitude)

ModuleNotFoundError: No module named 'geocoder'

In [99]:
#Get the Geospacial data fro the csv file
geo_df=pd.read_csv('http://cocl.us/Geospatial_data')

In [100]:
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [101]:
#Add columns to the geo_df data frame and create a new dataframe resulting from join_df and geo_df using postal co
geo_df.rename(columns={'Postal Code':'Postalcode'},inplace=True)
join_geo = pd.merge(join_df,geo_df, on='Postalcode')
#geo_data=join_geo[['Postalcode','Borough','Neighbourhood','Latitude','Longitude']]

In [102]:
join_geo.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
