### Importing the libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### loading the dataest

In [2]:
data = pd.read_csv('postalcode_canada.csv')
data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M1B,Scarborough,Rouge
2,M1B,Scarborough,Malvern
3,M1C,Scarborough,Highland Creek
4,M1C,Scarborough,Rouge Hill


### Find the number of rows and colums of the dataset

In [3]:
data.shape

(287, 3)

### Removing cells with non-assigned borough 

In [4]:
nonassigned = data[data['Borough']=='Not assigned'].index

In [5]:
nonassigned

Int64Index([  0,  38,  39,  40,  41,  42,  43,  44,  57,  58,  59,  60,  61,
             62,  63,  68,  69,  80,  81,  82,  83,  84,  85,  86,  87,  88,
            117, 157, 158, 187, 188, 189, 190, 191, 192, 194, 195, 196, 197,
            198, 199, 200, 201, 202, 203, 204, 206, 207, 208, 209, 210, 212,
            213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225,
            226, 227, 259, 260, 261, 262, 263, 273, 274, 284, 285, 286],
           dtype='int64')

In [6]:
data.drop(nonassigned, inplace=True)
data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1B,Scarborough,Rouge
2,M1B,Scarborough,Malvern
3,M1C,Scarborough,Highland Creek
4,M1C,Scarborough,Rouge Hill
5,M1C,Scarborough,Port Union


### The number of rows and columns after removing rows where borough is not assigned

In [7]:
data.shape

(210, 3)

### Merging of rows in which the postal code are similar but with different neighborhood.

In [8]:
df_code = data['Postcode'].unique()
data.set_index('Postcode', drop=False, inplace=True)

separator=', '
for postcode in df_code:
    df_tmp = data.loc[postcode]
    if(df_tmp.size > 3):
        neighbourhood = separator.join(df_tmp['Neighbourhood'])
        data.loc[postcode, 'Neighbourhood'] = neighbourhood

data.drop(data[data.Neighbourhood == 'Not assigned'].index, inplace=True)
data.drop_duplicates('Postcode', inplace=True)
data.reset_index(drop = True, inplace=True)
data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [9]:
data.shape

(103, 3)

### Loading geopspatial data

In [10]:
geodata = pd.read_csv('Geospatial_Coordinates.csv')
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merging the two datasets

In [11]:
geodf = geodata.rename(columns = {'Postal Code': 'Postcode'})
geodf.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
dataset = pd.merge(data,geodf, on = 'Postcode')
dataset.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [13]:
dataset.shape

(103, 5)

### Getting the latitudes and longitudes of Toronto using geopy library

In [14]:
address = 'Toronto'

geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Create a map of Toronto with neighborhoods superimposed on top.

In [15]:
# create map of toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, dataset in zip(dataset['Latitude'], dataset['Longitude'], dataset['Borough'], dataset['Neighbourhood']):
    label = '{}, {}'.format(dataset, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto