In [92]:

import pandas as pd  
import numpy as np  
import random  
from bs4 import BeautifulSoup
import requests

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                       

In [56]:
#Send the get request
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source,'lxml')


In [77]:
# create three lists to store table data
postalCodeList = []
boroughList = []
neighborhoodList = []

In [78]:
#adding the data to the lists
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if len(cells)>0:
        postalCodeList.append(cells[0].text)
        boroughList.append(cells[1].text)
        neighborhoodList.append(cells[2].text.rstrip('\n'))

In [79]:
#creating a dataframe using the lists
toronto_df = pd.DataFrame({"PostalCode" : postalCodeList,
                          "Borough" : boroughList,
                          "Neighborhood" : neighborhoodList})
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,
1,M2A\n,Not assigned\n,
2,M3A\n,North York\n,Parkwoods
3,M4A\n,North York\n,Victoria Village
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront"


In [80]:
#removing the 'not assighned' Borough
toronto_df = toronto_df[toronto_df.Borough != "Not assigned\n"].reset_index(drop=True)
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A\n,North York\n,Parkwoods
1,M4A\n,North York\n,Victoria Village
2,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront"
3,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights"
4,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government"


In [81]:
#grouping neighborhoods in the same borough
toronto_df_grouped = toronto_df.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
toronto_df_grouped.head()


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B\n,Scarborough\n,"Malvern, Rouge"
1,M1C\n,Scarborough\n,"Rouge Hill, Port Union, Highland Creek"
2,M1E\n,Scarborough\n,"Guildwood, Morningside, West Hill"
3,M1G\n,Scarborough\n,Woburn
4,M1H\n,Scarborough\n,Cedarbrae


In [82]:
toronto_df_grouped.shape

(103, 3)

In [83]:
#reading the csv file for the coordinates
csv_path = "https://cocl.us/Geospatial_data"
df = pd.read_csv(csv_path)
df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [84]:
df.rename(columns={"Postal Code":"PostalCode"},inplace = True)
df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [85]:
#modifying the 1st dataframe
toronto_df_grouped['Neighborhood'] = toronto_df_grouped['Neighborhood'].str.replace(' /', ',')
toronto_df_grouped['PostalCode'] = toronto_df_grouped['PostalCode'].str.replace('\n', '')
toronto_df_grouped['Borough'] = toronto_df_grouped['Borough'].str.replace('\n', '')
toronto_df_grouped.head() 


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [87]:
#checking whether the column to be merged are equal
toronto_df_grouped.iloc[0,0] == df.iloc[0,0]

True

In [95]:
#mergibg the two tables
toronto_df_new = pd.merge(left=toronto_df_grouped,right=df,left_on='PostalCode', right_on='PostalCode')
toronto_df_new.head() 

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [96]:
toronto_df_new.shape

(103, 5)

In [94]:
# @hiddencell

CLIENT_ID = '1H2WWX1TJLXM4SLAEDM3PIA4L1S2401DBIYXCFR5FRZTVDR2' # your Foursquare ID
CLIENT_SECRET = 'V5LNSSKJZSAJJJD5CSHAVZFVLE5N0BTLO1SPV5L44LFMIQMQ' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30

In [106]:
#creating a table for boroughs that contain the word Toronto 
toronto_df_new2 = toronto_df_new[toronto_df_new.Borough == "%Toronto"]
toronto_df_new2.head()
        
        

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
