# PART 1

### Web Scraping


In [1]:
import requests
import pandas as pd

url ='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

headers= {'User-Agent': 'Mozilla/5.0'}

html = requests.get(url).content

html = html.decode("utf-8") 

tables = pd.read_html(html)

Postal_code_DF = tables[0]

### Removing the rows that have Borough as 'Not Assigned' 

In [2]:
Postal_code_DF = Postal_code_DF[Postal_code_DF.Borough != 'Not assigned']

### Grouping the Neughbourhood by the Postcode and Borough columns

In [3]:
Grouped_DF = Postal_code_DF.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(list)
Grouped_DF = Grouped_DF.to_frame().reset_index()
Grouped_DF['liststring'] = Grouped_DF['Neighbourhood'].apply(lambda x: ','.join(map(str, x)))
Grouped_DF.drop(columns = ['Neighbourhood'], inplace = True)
Grouped_DF.rename(columns={'liststring': 'Neighbourhood'}, inplace = True)


### Checking for the 'Not Assigned' Neighbourhoods

In [4]:
Grouped_DF_NA = Grouped_DF[Grouped_DF.Neighbourhood == "Not assigned"]
Grouped_DF_NA

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M7A,Queen's Park,Not assigned


### Replacing the 'Not Assigned' neighbourhoods to Boroughs name

In [5]:
Grouped_DF.Neighbourhood.replace("Not assigned",Grouped_DF.Borough,inplace=True)

### Checking for the 'Not Assigned' Neighbourhoods again

In [6]:
Grouped_DF_NA = Grouped_DF[Grouped_DF.Neighbourhood == "Not assigned"]
Grouped_DF_NA

Unnamed: 0,Postcode,Borough,Neighbourhood


### Shape of the DF

In [7]:
Grouped_DF.shape

(103, 3)

# PART 2

### Hidden cell to load Co-ordinates data

In [8]:
{
    "tags": [
        "hide_input",
    ]
}
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share your notebook.
client_11c0d074f52c40ada664d6a8d2bec017 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='_OGQR0BSf0cWmiMDS9EVU8tjr6E7Z_mCW4esat2fyy5z',
    ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_11c0d074f52c40ada664d6a8d2bec017.get_object(Bucket='test-donotdelete-pr-zz3wumueodxn9y',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_1 = pd.read_csv(body)
df_data_1.head()



Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Renaming and joined on Postcode column

In [9]:
df_data_1.rename(columns={'Postal Code': 'Postcode'}, inplace = True)

Joined_Code_Coordinates = pd.merge(Grouped_DF, df_data_1, on = 'Postcode', how = 'inner')

In [10]:
Joined_Code_Coordinates.iloc[84:87]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
84,M6S,West Toronto,"Runnymede,Swansea",43.651571,-79.48445
85,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
86,M7R,Mississauga,Canada Post Gateway Processing Centre,43.636966,-79.615819


# PART 3

### Importing Folium

In [16]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium

### Map for all the data

In [38]:
# create map of New York using latitude and longitude values
latitude = Joined_Code_Coordinates['Latitude'].median()
longitude = Joined_Code_Coordinates['Longitude'].median()

map_canada = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Joined_Code_Coordinates['Latitude'], Joined_Code_Coordinates['Longitude'], Joined_Code_Coordinates['Borough'], Joined_Code_Coordinates['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada)  
    
map_canada

### Observations from above graph

#### 1. The above graph has points all over the City of Toronto including Suburbs

#### 2. Upper Rouge, Scarborough is the farthest neighbourhiid from the downtown

## Map for the Boroughs that has word Toranto in them

In [35]:
Toronto_Boroughs = Joined_Code_Coordinates[Joined_Code_Coordinates['Borough'].str.contains("Toronto")]

In [37]:
latitude = Toronto_Boroughs['Latitude'].median()
longitude = Toronto_Boroughs['Longitude'].median()

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_Boroughs['Latitude'], Toronto_Boroughs['Longitude'], Toronto_Boroughs['Borough'], Toronto_Boroughs['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Observations from above graph
##### 1. The closest major airport to Toronta Downtown is Billy Bishop Airport
##### 2. All the Boroughs with Toronto in their name could be clustered effectively