# Scraping Data From Wiki

In [1]:
#import all the required libraries
import pandas as pd
import numpy as np
#install new libs
!pip install requests
!pip install beautifulsoup4



In [11]:
from bs4 import BeautifulSoup
from urllib.request import urlopen as up

In [12]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = up(url)

## Get all Column Values

In [114]:
result = requests.get(url)
soup = BeautifulSoup(result.content, 'html.parser')
table = soup.find('table', { 'class' : 'wikitable sortable' })
trs = table.find_all('tr')
rows = []
for tr in trs:
    i = tr.find_all('td')
    if i:
        rows.append(i)
        
lst = []
for row in rows:
    postalcode = row[0].text.rstrip()
    borough = row[1].text.rstrip()
    neighborhood = row[2].text.rstrip()
    if borough != 'Not assigned':
        if neighborhood == 'Not assigned':
            neighborhood = borough
        lst.append([postalcode, borough, neighborhood])

In [115]:
cols = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(lst, columns=cols)
print(df.shape)

(211, 3)


In [116]:
df = df.groupby('PostalCode').agg(
    {
        'Borough':'first', 
        'Neighborhood': ', '.join,}
    ).reset_index()

In [117]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [113]:
df.shape

(103, 3)

In [118]:
dfgeo = pd.read_csv("http://cocl.us/Geospatial_data")
dfgeo.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

### Merge the coordinate values with location values

In [119]:
df_new = pd.merge(df, dfgeo, on="PostalCode", how='left')

In [121]:
df_new.loc[df_new['PostalCode'] == 'M5G']

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


### Verify the coordinates for a particular location

In [122]:

df_new.loc[df_new['PostalCode'] == 'M9V']

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
