### Import all the libaries

In [28]:
#import all libaries
from bs4 import BeautifulSoup
import requests
import pandas as pd

### Download the page

In [29]:
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
page

<Response [200]>

### Parse Page with beautifulsoup

In [30]:
canada_postal_codes = []
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table')
table_body = table.find('tbody')

rows = table_body.findAll('tr')
for row in rows:
    cols = row.findAll('td')    
    cols = [ele.text.strip() for ele in cols]
    canada_postal_codes.append(cols)

# Remove the empty array from front
canada_postal_codes = canada_postal_codes[1:]
canada_postal_codes[0:5]

[['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront']]

### Convert the list to pandas Dataframe

In [31]:
columns = ["Postcode", "Borough", "Neighbourhood"]
canada_df = pd.DataFrame(canada_postal_codes, columns = columns)
canada_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Cleaning Data

In [32]:
# Drop rows with Not assigned
canada_df_cleaned = canada_df[canada_df.Borough != 'Not assigned']
canada_df_cleaned.reset_index(drop = True, inplace=True)
canada_df_cleaned.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


### Merge Neighbourhood with same post codes

In [33]:
canada_grouped = canada_df_cleaned.groupby('Postcode', as_index = False).agg(lambda x: ', '.join(set(x)))
canada_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"West Hill, Morningside, Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Dealing with Not assigned neighborhood

In [34]:
canada_grouped[canada_grouped.Neighbourhood == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M7A,Queen's Park,Not assigned


In [35]:
for row in canada_grouped.itertuples():
    if row.Neighbourhood == 'Not assigned':
        canada_grouped.at[row.Index,'Neighbourhood'] = row.Borough        

In [36]:
canada_grouped.iloc[85]

Postcode                  M7A
Borough          Queen's Park
Neighbourhood    Queen's Park
Name: 85, dtype: object

In [37]:
canada_grouped.shape

(103, 3)

### Add Geocoding data

In [38]:
!conda install -c conda-forge geopy 
!conda install -c conda-forge geocoder

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.18.1                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geocoder                  1.38.1                     py_0    conda-forge


In [39]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None

g = geocoder.google("Santa Cruz", components="country:ES")
lat_lng_coords = g.latlng
print(g.latlng)

# Unable to get data from geocoder, Download and read csv

None


### Download the csv provided to continue

In [40]:
canada_geo_data = pd.read_csv("http://cocl.us/Geospatial_data")
canada_geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [41]:
canada_geo_data_full = pd.concat([canada_geo_data, canada_grouped], axis=1)
canada_geo_data_full.head()

Unnamed: 0,Postal Code,Latitude,Longitude,Postcode,Borough,Neighbourhood
0,M1B,43.806686,-79.194353,M1B,Scarborough,"Malvern, Rouge"
1,M1C,43.784535,-79.160497,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,43.763573,-79.188711,M1E,Scarborough,"West Hill, Morningside, Guildwood"
3,M1G,43.770992,-79.216917,M1G,Scarborough,Woburn
4,M1H,43.773136,-79.239476,M1H,Scarborough,Cedarbrae


### Rearrange The columns

In [42]:
cols = ['Postal Code', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude', 'Postcode']
canada_geo_data_full = canada_geo_data_full[cols]

#Drop last PostCode column
canada_geo_data_full.drop(['Postcode'], axis=1, inplace=True)

#Rename "Postal Code" to PostalCode
canada_geo_data_full.rename(index=str, columns={"Postal Code": "PostalCode"}, inplace=True)

canada_geo_data_full.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"West Hill, Morningside, Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Explore and cluster the neighborhoods in Toronto

In [43]:
#Filter the data for toronto
toronto_df = canada_geo_data_full[canada_geo_data_full.Borough.str.contains("Toronto")]
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"Riverdale, The Danforth West",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


### Install Folium package

In [47]:
!conda install -c conda-forge folium

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.8.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  16.46 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  37.17 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  40.92 MB/s
folium-0.8.0-p 100% |################################| Time: 0:00:00  49.92 MB/s


### Toronto map

In [52]:
from geopy.geocoders import Nominatim

address = 'Toronto, CAN'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.8184944, -79.3309468.


In [56]:
import folium
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map