# Installing necessary libraries for web scraping

In [1]:
#Installing necessary libraries for web scraping
!pip install selenium
!pip install BeautifulSoup4



# Importing necessary libraries

In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

# Webscraping data

In [3]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# makes a request to the web page and gets its HTML
r = requests.get(url)

# stores the HTML page in 'soup', a BeautifulSoup object
soup = BeautifulSoup(r.content)

# assigning to data frame

In [4]:
a = []
df = pd.DataFrame(columns = ['PostalCode', 'Borough', 'Neighborhood'])
for link in soup.find_all('td'):
    a.append(link.get_text())
    if len(a) == 3:
        df_length = len(df)
        df.loc[df_length] = a
        a = []
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
5,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights\n"
6,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n"
7,M8A\n,Not assigned\n,Not assigned\n
8,M9A\n,Etobicoke\n,"Islington Avenue, Humber Valley Village\n"
9,M1B\n,Scarborough\n,"Malvern, Rouge\n"


# Cleaning data

In [5]:
df2 = df.replace('\n','', regex=True)
df2 = df2.drop(index = range(180,191))
df2 = df2.drop(index = (df2[(df2['Borough']=='Not assigned')].index))
df2 = df2.reset_index(drop=True)
df2

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


# Checking if there are existing Not assigned value in column, Neighborhood

In [6]:
df2.loc[df2['Neighborhood']=='Not assigned']

Unnamed: 0,PostalCode,Borough,Neighborhood


In [7]:
df2.shape

(103, 3)

# Installing Geocoder

In [8]:
!pip install geocoder



# Importing Geocoder

In [21]:
import geocoder

# Getting the latitude and longitude of each Neighborhood

In [46]:
lat_lang_df = pd.read_csv('https://cocl.us/Geospatial_data')
lat_lang_df = lat_lang_df.sort_values(by=['Postal Code'])
df2 = df2.sort_values(by=['PostalCode'])

In [49]:
df2['Latitidue'] = lat_lang_df['Latitude']
df2['Longitude'] = lat_lang_df['Longitude']
df2 = df2.reset_index(drop=True)
df2

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitidue,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
