In [7]:
#!conda install -c conda-forge geopy --yes 
#!conda install -c conda-forge folium=0.5.0 --yes

import json
from pandas.io.json import json_normalize
import requests
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim



print ("Project Libraries Imported")

Project Libraries Imported


### Part 1 of Peer Assignment
#### Download Toronto Postal Codes from Wikipedia link and load initial dataframe

In [8]:
PSHTML = pd.read_html("http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
PSHTML = PSHTML[0]
print (PSHTML.shape)
PSHTML.head()

(287, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Data Processing and Cleansing

In [9]:
PSCodes = PSHTML.copy() #Create a Copy

#Rename Columns PostalCode
PSCodes.rename(columns = {'Postcode':'PostalCode'}, inplace =True)

#Drop all rows with unassigned Borough
PSCodes.drop(PSCodes[PSCodes['Borough'] == 'Not assigned'].index, inplace =True)
#PSCodes.drop(PSCodes[PSCodes['Neighbourhood'] == 'Not assigned'].index, inplace =True)
#There are no records with Neighbourhood as not assigned fter Boroughs are cleared

#Identify Unique Values
PSCodes['Neighbourhood'] = PSCodes.groupby(['PostalCode','Borough'])['Neighbourhood'].transform(lambda x: ','.join(x))
PSCodes.drop_duplicates(keep='first',inplace=True)
PSCodes.sort_values('PostalCode', inplace =True)
PSCodes.reset_index(inplace=True)
PSCodes.drop(['index'], axis=1, inplace =True)

#Check Data
print (PSCodes.shape)
PSCodes.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Part 2 of Peer Assignment
#### Update Latitude and Longitude from Geospatial Data

In [10]:

GeoData = pd.read_csv('http://cocl.us/Geospatial_data')
GeoData.rename(columns = {'Postal Code':'PostalCode'}, inplace =True)
print(GeoData.shape)
GeoData.head()

PSCodes = PSCodes.merge(GeoData, on='PostalCode')
PSCodes.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Part 3 of Peer Assignment
### Data Clustering

#### Get the latitude and longitude values of Toronto & Create a map of Toronto with neighborhoods superimposed on top.

In [14]:
TorAddr = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="canada_explorer")
location = geolocator.geocode(TorAddr)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [62]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(PSCodes['Latitude'], PSCodes['Longitude'], PSCodes['Borough'], PSCodes['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

#### Get nearby venues for Toronto Postal Codes using FourSquare API

In [81]:
CLIENT_ID = 'TZJVIWA4ZXOMVHGMHGA5J2MJGMJKEZROK0NWQXGDPNFINLXD' # your Foursquare ID
CLIENT_SECRET = 'DS20DJMNKBTBBGZXH2HN5RTOGDRFVTKNHHZPYS3LTTBSEM5Y' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
limit = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format( CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(name, lat, lng, v['venue']['name'], v['venue']['location']['lat'], v['venue']['location']['lng'],  v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostalCode', 
                  'PostalCode Latitude', 
                  'PostalCode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

toronto_venues = getNearbyVenues(names=PSCodes['PostalCode'],
                                   latitudes=PSCodes['Latitude'],
                                   longitudes=PSCodes['Longitude']
                                  )

M1B
M1C
M1E
M1G
M1H
M1J
M1K
M1L
M1M
M1N
M1P
M1R
M1S
M1T
M1V
M1W
M1X
M2H
M2J
M2K
M2L
M2M
M2N
M2P
M2R
M3A
M3B
M3C
M3H
M3J
M3K
M3L
M3M
M3N
M4A
M4B
M4C
M4E
M4G
M4H
M4J
M4K
M4L
M4M
M4N
M4P
M4R
M4S
M4T
M4V
M4W
M4X
M4Y
M5A
M5B
M5C
M5E
M5G
M5H
M5J
M5K
M5L
M5M
M5N
M5P
M5R
M5S
M5T
M5V
M5W
M5X
M6A
M6B
M6C
M6E
M6G
M6H
M6J
M6K
M6L
M6M
M6N
M6P
M6R
M6S
M7A
M7R
M7Y
M8V
M8W
M8X
M8Y
M8Z
M9A
M9B
M9C
M9L
M9M
M9N
M9P
M9R
M9V
M9W


In [82]:
print(toronto_venues.shape)
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))
toronto_venues.groupby('PostalCode').count().shape


(2231, 7)
There are 266 uniques categories.


(99, 6)

#### Processing venues and one hot encoding for data preping for clustering

In [90]:
# One hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['PostalCode'] = toronto_venues['PostalCode'] 
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_grouped = toronto_onehot.groupby('PostalCode').mean().reset_index()
print(toronto_grouped.shape)

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10
indicators = ['st', 'nd', 'rd']

columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
PSCodes_venues_sorted = pd.DataFrame(columns=columns)
PSCodes_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']

for ind in np.arange(toronto_grouped.shape[0]):
    PSCodes_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

PSCodes_venues_sorted.head()

(99, 267)


Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Dessert Shop,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
1,M1C,Bar,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore,Farmers Market
2,M1E,Rental Car Location,Moving Target,Spa,Breakfast Spot,Intersection,Medical Center,Mexican Restaurant,Electronics Store,Coworking Space,Discount Store
3,M1G,Coffee Shop,Korean Restaurant,Convenience Store,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
4,M1H,Fried Chicken Joint,Gas Station,Bank,Hakka Restaurant,Caribbean Restaurant,Athletics & Sports,Thai Restaurant,Bakery,Dim Sum Restaurant,Diner


#### K Means Clustering

In [91]:
# KMeans Clustering
# Set number of clusters
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('PostalCode', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
# check cluster labels generated for each row in the dataframe
print(kmeans.labels_)
PSCodes_venues_sorted.insert(0, 'Clusters', kmeans.labels_)

toronto_merged = PSCodes
toronto_merged = toronto_merged.join(PSCodes_venues_sorted.set_index('PostalCode'), on='PostalCode')
toronto_merged.dropna(inplace = True)
toronto_merged['Clusters'] = toronto_merged['Clusters'].astype('int32')
                             
toronto_merged.head()


[0 1 1 1 1 3 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 3 1 3 1 1 1 1 3 1 1 1 1 1 1 1 1
 1 3 1 1 1 3 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1
 1 1 1 3 1 4 1 1 1 1 1 1 1 1 1 2 1 1 4 2 3 4 4 1 1]


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Clusters,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0,Fast Food Restaurant,Dessert Shop,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,1,Bar,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore,Farmers Market
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,1,Rental Car Location,Moving Target,Spa,Breakfast Spot,Intersection,Medical Center,Mexican Restaurant,Electronics Store,Coworking Space,Discount Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Coffee Shop,Korean Restaurant,Convenience Store,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Fried Chicken Joint,Gas Station,Bank,Hakka Restaurant,Caribbean Restaurant,Athletics & Sports,Thai Restaurant,Bakery,Dim Sum Restaurant,Diner


#### Create a map of Toronto with neighborhoods superimposed on top with cluster based color coding

In [93]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Clusters'].astype('int32')):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [104]:
toronto_merged.loc[toronto_merged['Clusters'] == 0, toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,"Rouge,Malvern",Fast Food Restaurant,Dessert Shop,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop


### Cluster 2

In [105]:
toronto_merged.loc[toronto_merged['Clusters'] == 1, toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,"Highland Creek,Rouge Hill,Port Union",Bar,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore,Farmers Market
2,Scarborough,"Guildwood,Morningside,West Hill",Rental Car Location,Moving Target,Spa,Breakfast Spot,Intersection,Medical Center,Mexican Restaurant,Electronics Store,Coworking Space,Discount Store
3,Scarborough,Woburn,Coffee Shop,Korean Restaurant,Convenience Store,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
4,Scarborough,Cedarbrae,Fried Chicken Joint,Gas Station,Bank,Hakka Restaurant,Caribbean Restaurant,Athletics & Sports,Thai Restaurant,Bakery,Dim Sum Restaurant,Diner
6,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",Discount Store,Department Store,Coffee Shop,Chinese Restaurant,Convenience Store,Bus Station,Eastern European Restaurant,Electronics Store,Dumpling Restaurant,Dim Sum Restaurant
7,Scarborough,"Clairlea,Golden Mile,Oakridge",Bakery,Bus Line,Soccer Field,Park,Bus Station,Intersection,Ice Cream Shop,Dog Run,Discount Store,Distribution Center
8,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",Motel,American Restaurant,Department Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Yoga Studio
9,Scarborough,"Birch Cliff,Cliffside West",College Stadium,Café,Skating Rink,General Entertainment,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
10,Scarborough,"Dorset Park,Scarborough Town Centre,Wexford He...",Indian Restaurant,Chinese Restaurant,Pet Store,Vietnamese Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
11,Scarborough,"Maryvale,Wexford",Sandwich Place,Auto Garage,Shopping Mall,Breakfast Spot,Bakery,Donut Shop,Discount Store,Distribution Center,Dog Run,Doner Restaurant


### Cluster 3

In [106]:
toronto_merged.loc[toronto_merged['Clusters'] == 2, toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",Baseball Field,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore,Farmers Market
97,North York,"Emery,Humberlea",Furniture / Home Store,Baseball Field,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Yoga Studio,Dessert Shop


### Cluster 4

In [108]:
toronto_merged.loc[toronto_merged['Clusters'] == 3, toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,Scarborough Village,Playground,Yoga Studio,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
14,Scarborough,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Bakery,Playground,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
23,North York,York Mills West,Park,Bank,Convenience Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dessert Shop,Donut Shop
25,North York,Parkwoods,Park,Food & Drink Shop,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant
30,North York,"CFB Toronto,Downsview East",Park,Snack Place,Airport,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
40,East York,East Toronto,Park,Convenience Store,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Department Store,Donut Shop
44,Central Toronto,Lawrence Park,Park,Bus Line,Swim School,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop,Dessert Shop
50,Downtown Toronto,Rosedale,Park,Trail,Playground,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
74,York,Caledonia-Fairbanks,Park,Market,Women's Store,Golf Course,Gluten-free Restaurant,Electronics Store,Eastern European Restaurant,Gourmet Shop,Dumpling Restaurant,Drugstore
79,North York,"Downsview,North Park,Upwood Park",Basketball Court,Park,Bakery,Construction & Landscaping,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop


### Cluster 5

In [109]:
toronto_merged.loc[toronto_merged['Clusters'] == 4, toronto_merged.columns[[1,2] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
81,York,"The Junction North,Runnymede",Convenience Store,Grocery Store,Bus Line,Pizza Place,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
96,North York,Humber Summit,Empanada Restaurant,Pizza Place,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Donut Shop
99,Etobicoke,Westmount,Pizza Place,Chinese Restaurant,Coffee Shop,Intersection,Sandwich Place,Yoga Studio,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
100,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",Bus Line,Pizza Place,Sandwich Place,Mobile Phone Shop,Yoga Studio,Distribution Center,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant
