# *** Note: this will be used for all 3 parts of the assignment **

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

# Get wikipedia page which contains postal codes for Toronto

In [2]:
page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')


# Create a BeautifulSoup object which will be used for parsing postal codes from web page

In [3]:
soup = BeautifulSoup(page.text, 'html.parser')

# loop through page for required tags

In [4]:
table_Toronto_zipinfo = soup.find('table')
colvals = table_Toronto_zipinfo.find_all('td')
elem_cnt = len(colvals)
postcode = []
borough = []
neighborhood = []

for i in range(0, elem_cnt, 3):
    postcode.append(colvals[i].text.strip())
    borough.append(colvals[i+1].text.strip())
    neighborhood.append(colvals[i+2].text.strip())

# build dataframe with only the required columns

In [5]:
df_Toronto_postcodes = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
df_Toronto_postcodes.columns = ['Postcode', 'Borough', 'Neighborhood']

In [6]:
#df_Toronto_postcodes

# clean the data by dropping rows where 'Borough' is not assigned

In [7]:
df_Toronto_postcodes.drop(df_Toronto_postcodes[df_Toronto_postcodes['Borough'] == 'Not assigned'].index, inplace=True) 


# if Borough exist but a Neighborbood does not exist then set the Neighborhood to have the same value as the Borough

In [8]:
df_Toronto_postcodes.loc[df_Toronto_postcodes.Neighborhood == 'Not assigned', 'Neighborhood'] = df_Toronto_postcodes.Borough

In [9]:
#df_Toronto_postcodes

# group data where more than one Neighborhood has the same Postal Code

In [10]:
df_group_Toronto = df_Toronto_postcodes.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index() 
  

In [11]:
df_group_Toronto

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [12]:
df_group_Toronto.shape
print('The shape of this dataframe is',df_group_Toronto.shape)

The shape of this dataframe is (103, 3)


In [13]:
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')


Data downloaded!


In [14]:
geospatial_data = pd.read_csv('Geospatial_Coordinates.csv')

#geospatial_data.head()


In [15]:
#rename Postal Code to PostCode
geospatial_data = geospatial_data.rename(columns={geospatial_data.columns[0]: "Postcode" })

# merge Neighborhood data with Latitude and longitude
df_group_Toronto_with_coord = df_group_Toronto.merge(geospatial_data, on = 'Postcode')

In [16]:
df_group_Toronto_with_coord

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [17]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.19.0                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Libraries imported.


In [18]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Determine Latitude and Longitude of Toronto

In [19]:
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="Toronto Details")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


# Create map of Toronto with Neighborhoods

In [20]:
# create a map of Toronto
map_of_toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)

#add neighborhood markers to the Toronto map
for lat, long, bor, neigh in zip(df_group_Toronto_with_coord['Latitude'], df_group_Toronto_with_coord['Longitude'], 
                                 df_group_Toronto_with_coord['Borough'], df_group_Toronto_with_coord['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'white',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_of_toronto)
        
map_of_toronto