# Data Science Capstone Project (Coursera)

### This notebook will contain a data science project on location data and willl use FourSquare API

In [1]:
import pandas as pd
import numpy as np

In [2]:
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


In [3]:
from bs4 import BeautifulSoup
import requests

# Extracting table from Wikipedia page using BeautifulSoup library

In [4]:
web_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(web_page, 'lxml')

In [5]:
can_table = soup.find('table',{'class':'wikitable sortable'})
tr = can_table.findAll('tr')

### Creating list of the Wikipedia table

In [6]:
list_of_postal_codes = []
for r in tr:
    td = r.findAll('td')
    row = []
    for d in td:
        #print(d.text)
        row.append(d.text)
    list_of_postal_codes.append(row)

In [7]:
list_of_postal_codes = list_of_postal_codes[1:]
for i in list_of_postal_codes:
    i[2] = i[2][:-1]

### Converting it into Pandas DataFrame

In [8]:
canada_postal_codes = pd.DataFrame(list_of_postal_codes, columns = ['Postcode', 'Borough', 'Neighbourhood'])

In [9]:
canada_postal_codes.shape

(288, 3)

### Removing rows with 'not assigned' Borough

In [10]:
canada_postal_codes = canada_postal_codes[canada_postal_codes['Borough']!='Not assigned']
canada_postal_codes.reset_index(drop=True, inplace=True)

### Concatinationg neighbourhood with common postcodes

In [11]:
canada_postal_codes = canada_postal_codes.groupby('Postcode').agg({'Borough' : 'first',
                                                                  'Neighbourhood': ','.join})

In [12]:
canada_postal_codes.reset_index(inplace=True)

In [13]:
canada_postal_codes.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Copying borough value to neighbourhood with 'not assigned' value

In [14]:
for i in range(0,103):
    if canada_postal_codes.loc[i][1] == 'Not assigned':
        canada_postal_codes.loc[i][1] = canada_postal_codes.loc[i][0]

In [15]:
canada_postal_codes.shape

(103, 3)

In [16]:
canada_postal_codes.tail()

Unnamed: 0,Postcode,Borough,Neighbourhood
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."
102,M9W,Etobicoke,Northwest


### Adding Geospatial data

In [17]:
df = pd.read_csv('Geospatial_Coordinates.csv')

In [18]:
canada_postal_codes.rename(columns = {'Postcode' : 'Postal Code'}, inplace=True)

In [19]:
canada_postal_codes = canada_postal_codes.merge(df, on='Postal Code', how='inner')

In [20]:
canada_postal_codes.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
