## 1. Repeat steps to segment neighbourhoods by postcode

In [7]:
import pandas as pd
import numpy as np

#Instead of using Beautiful Soup, read_html from the pandas library can provide a way to read all the tables in the webpage.

big_df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",flavor="bs4",header=0)

#The first data set in big_df is the main table in the wikipedia article, which is the table we are interested in using for our geographical data.

To_postcodes = big_df[0]
To_postcodes.shape

#The columns where the Borough is not assigned are to be removed

To_postcodes = To_postcodes[~To_postcodes['Borough'].isin(['Not assigned'])]
To_postcodes.shape

#Where Neighbourhood is unassigned, the value the string from Borough is duplicated in this cell

To_postcodes['Neighbourhood'].replace("Not assigned",To_postcodes['Borough'], inplace=True)

#Finally, the data needs to be grouped so any duplicated postcodes are removed, and added additional data is joined in the Neighbourhood column. a join command is used to add the values from Neighbourhood.
#The index is reset to clean up the indexing.

To_postcodes_grouped = To_postcodes.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
To_postcodes_grouped.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## 2. Attaining geospatial co-ordinates by postcode for the above data

In [8]:
postcode_coordinates = pd.read_csv('http://cocl.us/Geospatial_data')
postcode_coordinates.columns = ['Postcode', 'Latitude', 'Longitude']
postcode_coordinates.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


The table required for the assessment is shown below

In [9]:
#the merge command in the pandas library is similar to the join command used in SQL, and can be used to combine the data using the common link of postcode in both tables.
To_postcodes_coords = pd.merge(To_postcodes_grouped, postcode_coordinates, on=['Postcode'], how='inner')
To_postcodes_coords.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## 3. Exploring Toronto with this data

In [10]:
#Lets look at just Toronto to make the data set smaller

Toronto_data = To_postcodes_coords[To_postcodes_coords['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
Toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [12]:
# set the centre point (i.e. Toronto) for the visualisation

from geopy.geocoders import Nominatim
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [15]:
#map plotting package required to plot on maps
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Folium imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  44.76 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  21.83 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  22.59 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  25.42 MB/s
Folium imported.


In [17]:
# create map to visualise Downtown Toronto
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map