In [6]:
#!pip install lxml
#!pip install html5lib
#!pip install requests



### Activity 1 is to use Webscrapping methods and get the data for Canada Postal Codes from the wiki page

In [90]:
#Import the required libraries
import numpy as np
import pandas as pd

from bs4 import BeautifulSoup
import requests
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

In [91]:
#Define the url and get the table data from the website url
#The data will be stored in My_table
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'lxml')
My_table = soup.find('table',{'class':'wikitable sortable'})

In [92]:
#Define the dataframe and extract the data from html table into the dataframe
column_names = ['PostalCode','Borough', 'Neighborhood'] 
CAdf=pd.DataFrame(columns=column_names)

rows=My_table.find_all('tr')
res=[]
for row in rows:
    cells= row.find_all('td')
    res.append(cells)

for i in range(len(res)):
    if (i!=0) :
        CAdf = CAdf.append({'PostalCode': res[i][0].text,'Borough': res[i][1].text,'Neighborhood': res[i][2].text}, ignore_index=True)

In [93]:
#Remove the extra characters 
CAdf['Neighborhood']=CAdf['Neighborhood'].str.replace('\n','')
#filter the dataset and choose only those data that have a definite Borough, filter out rows which are not equal to 'Not assigned'
CAdf_filtered=CAdf[CAdf['Borough']!='Not assigned']    
#Incase when Neibhborhood ='Not assigned', use the corresponding Borough Name as Neighborhood
CAdf_filtered['Neighborhood']=np.where((CAdf_filtered['Neighborhood']=='Not assigned'),CAdf_filtered['Borough'],CAdf_filtered['Neighborhood'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [87]:
CAdf_final=CAdf_filtered.groupby('PostalCode').agg(lambda x: ', '.join(set(x))).reset_index()

In [94]:
CAdf_final.shape

(103, 3)

### The following is for Activity 2 , which is to download  the lat and long values for the Postal codes and merge the data into a final data frame

In [95]:
# Get the geospatial data which has Canada Postal Codes and their Long and Lat values
gsd=pd.read_csv("http://cocl.us/Geospatial_data")

In [96]:
# Merge the Cleaned data from the website and Lat long data 
CAdf_merge= pd.merge(CAdf_final,gsd[['Postal Code','Latitude','Longitude']],left_on='PostalCode',right_on='Postal Code',how='left')

In [97]:
#Delete the duplicate columns
CAdf_merge.drop('Postal Code',axis=1, inplace=True)


In [98]:
# Create a dataframe which has only those Borough which has Toronto in their name
CAdf_Toronto=CAdf_merge[CAdf_merge['Borough'].str.contains('Toronto')].reset_index(drop=True)

### Nieghborhood for Toronto

In [99]:

address = 'Toronto'

geolocator = Nominatim(user_agent="CA_Exploror")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [100]:
# create map of Toronto using latitude and longitude values
map_Toronto= folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(CAdf_Toronto['Latitude'], CAdf_Toronto['Longitude'], CAdf_Toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto