## This Notebook contains the content of Capstone Project.


In [37]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import csv
import random
import requests
import folium
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize


### Getting the Wikipedia Page 

In [3]:
source =requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup=BeautifulSoup(source,'lxml')

### Applying code to scrape the required data from wikipedia

In [4]:
my_table=soup.find('table',{'class':'wikitable sortable'})

table_rows=my_table.find_all("tr")

res=[]

for tr in table_rows:
    td=tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)

df=pd.DataFrame(res,columns=["Postcode","Borough","Neighbourhood"])
df.to_csv("Scraped_file.csv")

### Fetching the Newly saved CSV file

In [5]:
df=pd.read_csv("Scraped_file.csv")

### Data Analysis and Feature Engineering

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,Postcode,Borough,Neighbourhood
0,0,M1A,Not assigned,Not assigned
1,1,M2A,Not assigned,Not assigned
2,2,M3A,North York,Parkwoods
3,3,M4A,North York,Victoria Village
4,4,M5A,Downtown Toronto,Harbourfront


In [7]:
df.shape

(288, 4)

In [8]:
df.size


1152

In [9]:
df.drop(columns=["Unnamed: 0"],inplace=True)

df

In [10]:
df.isnull().sum()

Postcode         0
Borough          0
Neighbourhood    0
dtype: int64

In [11]:
count=0
for i in df["Borough"]:
    if(i=="Not assigned"):
        count=count+1
print(count)



77


### 77 rown labelled as ' Borough = Not Assigned shall be removed'

In [12]:
df.drop(df[df['Borough']=="Not assigned"].index,inplace=True)

In [13]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [14]:
df=df.reset_index()

In [15]:
df.drop(columns=["index"],inplace=True)

In [16]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Not assigned
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


In [17]:
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()

### Grouping together neighbourhood on basis on unique Postcode

In [18]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [19]:
df.shape

(103, 3)

### Accessing the GeoLocation of Neighbourhoods

In [20]:
ds=pd.read_csv("Geospatial_Coordinates.csv")

In [21]:
ds.shape

(103, 3)

In [22]:
ds.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [23]:
ds.rename(columns={"Postal Code":"Postcode"},inplace=True)

In [24]:
ds

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Merging the 2 dataframes

In [25]:
final_df=pd.merge(df,ds,on="Postcode")

In [26]:
final_df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


### Let's first work upon the condition where Borough = Toronto

In [30]:
toronto_df=toronto_df[toronto_df.Borough.str.contains("Toronto")].reset_index()

In [32]:
toronto_df.shape

(38, 6)

In [33]:
toronto_df.columns

Index(['index', 'Postcode', 'Borough', 'Neighbourhood', 'Latitude',
       'Longitude'],
      dtype='object')

In [34]:
toronto_df.drop(columns=["index"],inplace=True)

In [36]:
toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [41]:
address = 'toronto'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("The Geolocation of Toronto are:")
print(latitude, longitude)

The Geolocation of Toronto are:
43.653963 -79.387207


In [43]:
### Map of Toronto and Neighbourhood

In [44]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Define Foursquare Credentials and Version

In [42]:
CLIENT_ID = 'AQE5YEFAL1RDS15DE31KBHJWQJ1QMWKHRNHMOYV3KVO5YUBX' # your Foursquare ID
CLIENT_SECRET = 'W0PLVAXKOU5IHVE3YD3KEXOFW1JPV0ZXROBRFYX05AIHSBJY' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: AQE5YEFAL1RDS15DE31KBHJWQJ1QMWKHRNHMOYV3KVO5YUBX
CLIENT_SECRET:W0PLVAXKOU5IHVE3YD3KEXOFW1JPV0ZXROBRFYX05AIHSBJY
