In [3]:
#!conda install -c anaconda beautifulsoup4 --yes
#!conda install -c anaconda lxml --yes
import numpy as np 
import pandas as pd
print('Libraries imported.')

Libraries imported.


In [4]:
# Part 1
#Importing URL to dataframe
URL = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df=pd.read_html(URL)
df[0].head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [5]:
#Preparation of data
df_new= df[0]
df_new.columns = ['PostalCode', 'Borough', 'InitialNeighborhood']

#Filtering Not assigned in Brorugh column
df_new = df_new[df_new.Borough != 'Not assigned']
df_new.head(5)


Unnamed: 0,PostalCode,Borough,InitialNeighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [6]:
#Grouping by a postal code

clean_df =df_new.groupby("PostalCode").agg(lambda x:', '.join(set(x)))
clean_df.head(5)

Unnamed: 0_level_0,Borough,InitialNeighborhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [7]:
# Cleaning Not assigned in Neigborhood column
clean_df.loc[clean_df.InitialNeighborhood == 'Not assigned', 'Neighborhood'] = clean_df.Borough
clean_df.loc[clean_df.InitialNeighborhood != 'Not assigned', 'Neighborhood'] = clean_df.InitialNeighborhood    
clean_df.drop(["InitialNeighborhood"], axis = 1, inplace = True)
clean_df2 = clean_df.reset_index()
clean_df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
#Shape of dataframe
print('Shape of data frame: ', clean_df2.shape)
print ('The number of rows in data frame : ', clean_df2.shape[0])

Shape of data frame:  (103, 3)
The number of rows in data frame :  103


In [9]:
# Part 2

#Importing geolocation to dataframe

URL_geo = 'https://cocl.us/Geospatial_data'
df_geo= pd.read_csv(URL_geo)
print('Shape of data frame: ', df_geo.shape)
df_geo.head()


Shape of data frame:  (103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
# Renaming column 
df_geo_new = df_geo.rename(columns={'Postal Code': 'PostalCode'})

# Merging 2 dataframes
df_total = clean_df2.set_index('PostalCode').join(df_geo_new.set_index('PostalCode'))

df_total = df_total.reset_index()

# Show dataframe
df_total.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [11]:
#Part 3 Exploring and clustering the neighborhoods in Toronto.

# library to handle requests
import requests 
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
!conda install -c conda-forge folium=0.5.0 --yes 

# map rendering library
import folium 
print('Libraries imported')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be 

In [20]:
latitude_toronto = 43.653908 
longitude_toronto = -79.384293


In [21]:
#Selecting only boroughs that contain the word Toronto

df3 = df_total[ df_total.Borough.str.contains('Toronto') ]

# Create a map 
map_canada = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=10)

# add markers to map
for pc, borough, neighborhood, lat, lng in zip(df3['PostalCode'], df_total['Borough'], df_total['Neighborhood'], df_total['Latitude'], df_total['Longitude']):
    label = '{}, {}, {}'.format(pc, neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada)
    
map_canada