## Analyzing Canada's Neighborhood

### 1- Import the libraries

In [167]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy
import requests

### 2- Scraping the web page 

In [168]:
site = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

#### If the result starts with number two, the page was successfully accessed

In [169]:
site.status_code

200

#### Create a BeautifulSoup object

In [170]:
soup = BeautifulSoup(site.content, 'html.parser')

#### Check the title of web page

In [563]:
print(soup.title)

<title>List of postal codes of Canada: M - Wikipedia</title>


#### Find and create a list of the table

In [569]:
table_html = soup.table
print(table_html.find_all('th'))

[<th>Postcode</th>, <th>Borough</th>, <th>Neighbourhood
</th>]


In [629]:
table = [text.get_text() for text in table_html.find_all('td')]

In [630]:
table[:3]

['M1A', 'Not assigned', 'Not assigned\n']

#### Create a list with the Postcode, Borough and Neighbourhood

In [631]:
postcode = [table[i] for i in range(0, len(table), 3)]
postcode[:3]

['M1A', 'M2A', 'M3A']

In [632]:
borough = [table[i] for i in range(1, len(table), 3)]
borough[:3]

['Not assigned', 'Not assigned', 'North York']

In [633]:
neighbourhood = [table[i] for i in range(2, len(table), 3)]
neighbourhood = [s.replace('\n', '') for s in neighbourhood]
neighbourhood = [s.replace('Not assigned', borough[i]) for i, s in enumerate(neighbourhood)]
neighbourhood[:3]

['Not assigned', 'Not assigned', 'Parkwoods']

#### Create a dictionary

In [634]:
dict_table = {
    'Postcode': postcode,
    'Borough': borough,
    'Neighbourhood': neighbourhood}

#### Group the Neighbourhood with PostCode

In [635]:
for j in range(3):
    for i, v in enumerate(dict_table['Postcode']):
        if i == 0:
            continue
        if v == dict_table['Postcode'][i-1]:
            del dict_table['Postcode'][i-1]

            del dict_table['Borough'][i-1]

            dict_table['Neighbourhood'][i] += (', ' + dict_table['Neighbourhood'][i-1])
            del dict_table['Neighbourhood'][i-1]
#dict_table

### 3- Creating a Pandas DataFrame

In [636]:
df_table = pd.DataFrame(dict_table, columns=['Postcode', 'Borough', 'Neighbourhood'])

#### Clean and drop the row with 'Not assigned'

In [637]:
not_assign = df_table[((df_table['Borough'] == 'Not assigned'))].index

In [638]:
df_table.drop(not_assign, inplace=True)

#### Displaying the final result

In [639]:
df_table.style

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,Queen's Park
8,M9A,Queen's Park,Queen's Park
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills North
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [648]:
df_table.shape

(103, 3)

In [646]:
df_table.to_csv('neighbor.csv')

In [647]:
!ls

coursera_capstone.ipynb     LICENSE	  neighborhoods_toronto.ipynb
Geospatial_Coordinates.csv  neighbor.csv  README.md
