# Capstone Project Part-1

In [142]:
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from urllib.request import urlopen

### Using BeautifulSoup library to scrap data from Wikipedia

In [443]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urlopen(url) 
soup = BeautifulSoup(html, 'html.parser')

In [444]:
tables = soup.find_all('table')

### Create array to hold the data we extract in three lists

In [445]:
postal_codes = []
boroughs = []
neighbourhoods = []

for table in tables:
    rows = table.find_all('tr')
    
    for row in rows[1:]:
        cells = row.find_all('td')
        if len(cells) > 1:
            postal_code = cells[0]
            postal_codes.append(postal_code.text.strip())
            
            borough = cells[1]
            boroughs.append(borough.text.strip())
            
            neighbourhood = cells[2]
            neighbourhoods.append(neighbourhood.text.strip())

### Creating dataframe using the lists

In [446]:
df1 = pd.DataFrame(postal_codes, columns = ['PostalCode'])

df1['Borough']=boroughs
df1['Neighbourhood']=neighbourhoods
df1.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Removing the 'Not assigned' values from the table

In [447]:
df1=df1[df1.Borough != 'Not assigned']

In [448]:
df1.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [449]:
'Not assigned' in df1

False

### Assigning value of correspondig 'Borough' to ' Neighbourhood' with 'Not assigned' value

In [450]:
for i in range(0,len(df1)):
    if df1.iloc[i,2]=='Not assigned':
        df1.iloc[i,2]=df1.iloc[i,1]

In [451]:
'Not assigned' in df1

False

### Resetting the Index numbers

In [452]:
df1.reset_index(inplace=True)
df1.tail(10)

Unnamed: 0,index,PostalCode,Borough,Neighbourhood
97,157,M5X,Downtown Toronto,"First Canadian Place, Underground city"
98,160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,165,M4Y,Downtown Toronto,Church and Wellesley
100,168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."
102,178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."
103,180,NL\n\nNS\n\nPE\n\nNB\n\nQC\n\nON\n\nMB\n\nSK\n...,NL,NS
104,181,NL,NS,PE
105,182,A,B,C
106,183,A,B,C


In [453]:
df1=df1[['PostalCode','Borough','Neighbourhood']]
df1.tail()

Unnamed: 0,PostalCode,Borough,Neighbourhood
102,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."
103,NL\n\nNS\n\nPE\n\nNB\n\nQC\n\nON\n\nMB\n\nSK\n...,NL,NS
104,NL,NS,PE
105,A,B,C
106,A,B,C


### Removing rows other table from Dataframe *(df1)*

In [454]:
df1=df1[0:-4]
df1

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [455]:
df1.shape

(103, 3)

# Capstone Project- Part 2 mapping longitude and latitude using postal codes

In [456]:
df2=pd.read_csv('Geospatial_Coordinates.csv')
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [457]:
latitude=[]
longitude=[]

In [458]:
for i in range(0,len(df1)):
        for j in range(0,len(df2)):
            if df1.iloc[i:i+1,0:1].values== df2.iloc[j:j+1,0:1].values:
                lat=df2.iloc[j:j+1,1:2].values
                long=df2.iloc[j:j+1,2:3].values
                lat = np.array(lat, dtype=float)
                long = np.array(long, dtype=float)
                latitude.append(lat)
                longitude.append(long)

In [459]:
df1['Latitude']=latitude
df1['Longitude']=longitude
df1.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,[[43.7532586]],[[-79.3296565]]
1,M4A,North York,Victoria Village,[[43.725882299999995]],[[-79.31557159999998]]
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",[[43.6542599]],[[-79.3606359]]
3,M6A,North York,"Lawrence Manor, Lawrence Heights",[[43.718517999999996]],[[-79.46476329999999]]
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",[[43.6623015]],[[-79.3894938]]


In [460]:
df1['Latitude']=df1['Latitude'].astype(float)

In [461]:
df1['Longitude']=df1['Longitude'].astype(float)

## Output dataframe

In [462]:
df1.head(12)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Capstone Project- Part 3 Exploring and clustering the neighborhoods in Toronto using *Borough*

In [393]:
import re

In [408]:
df1=df1[df1.Borough.str.contains('Toronto', regex= True, na=False)]

In [441]:
df1.head(12)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [417]:
lat_m=df1['Latitude'].mean()
lat_m

43.66713498717949

In [418]:
long_m=df1['Longitude'].mean()
long_m

-79.38987324871795

In [410]:
from IPython.display import Image 
from IPython.core.display import HTML 
import folium

## Output Map of cluster the neighborhoods in Toronto

In [440]:
venues_map = folium.Map(location=[lat_m,long_m], zoom_start=12)
for la, ln, label in zip(df1.Latitude, df1.Longitude, df1.Borough):
    if label=='Downtown Toronto':
        folium.features.CircleMarker(
            [la, ln],
            radius=5,
            color='blue',
            popup=label,
            fill = True,
            fill_color='blue',
            fill_opacity=0.6
            ).add_to(venues_map)
    elif label=='Central Toronto':
        folium.features.CircleMarker(
            [la, ln],
            radius=5,
            color='red',
            popup=label,
            fill = True,
            fill_color='red',
            fill_opacity=0.6
            ).add_to(venues_map)
    elif label=='East Toronto':
        folium.features.CircleMarker(
            [la, ln],
            radius=5,
            color='green',
            popup=label,
            fill = True,
            fill_color='green',
            fill_opacity=0.6
            ).add_to(venues_map)
    elif label=='West Toronto':
        folium.features.CircleMarker(
            [la, ln],
            radius=5,
            color='black',
            popup=label,
            fill = True,
            fill_color='black',
            fill_opacity=0.6
            ).add_to(venues_map)

venues_map