In [1]:
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
import json
from pandas.io.json import json_normalize
import numpy as np

<h3>Pt 1: Data Scaping</h3>

In [2]:
# send the GET request
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

# parse data from the html into a Beautifulsoup object
soup = BeautifulSoup(data, 'lxml')

table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['Postal Code'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [3]:
toronto_df=pd.DataFrame(table_contents)
toronto_df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [4]:
toronto_df['Borough']=toronto_df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

<h3>Pt 2: Adding Latitude and Longitude to the dataframe</h3>

In [5]:
import os, types
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

#@hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

if os.environ.get('RUNTIME_ENV_LOCATION_TYPE') == 'external':
    endpoint_ = 'https://s3.ap-geo.objectstorage.softlayer.net'
else:
    endpoint_ = 'https://s3.ap-geo.objectstorage.service.networklayer.com'

client_ = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url=)

body = client_.get_object(Bucket='myfirstnotebook-donotdelete-pr-',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

longlat_df= pd.read_csv(body)
longlat_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


<h4>Merge the two dataframes</h4>

In [6]:
dataframe = pd.merge(toronto_df, longlat_df, on='Postal Code')
dataframe.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [7]:
print('The dataframe has {} boroughs'.format(len(dataframe['Borough'].unique())))

The dataframe has 15 boroughs


In [8]:
dataframe.shape 

(103, 5)

<h4> Use geopy library to get the latitude and longitude values of Toronto City </h4>

In [9]:
!pip install folium



In [10]:
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

address = 'Toronto , Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto City are 43.6534817, -79.3839347.


<h4>Create a map of Toronto with neighborhoods superimposed on top</h4>

In [11]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dataframe['Latitude'], dataframe['Longitude'], dataframe['Borough'], dataframe['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [12]:
#get the boroughs of Toronto
print(dataframe.Borough.unique())

['North York' 'Downtown Toronto' "Queen's Park" 'Etobicoke' 'Scarborough'
 'East York' 'York' 'East Toronto' 'West Toronto' 'East York/East Toronto'
 'Central Toronto' 'Mississauga' 'Downtown Toronto Stn A'
 'Etobicoke Northwest' 'East Toronto Business']


<h4>We segment and cluster only the neighborhoods in Etobicoke</h4>

In [13]:
etobicoke_data = dataframe[dataframe['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicoke_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724


In [14]:
address = 'Etobicoke, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
elocation = geolocator.geocode(address)
elatitude = location.latitude
elongitude = location.longitude
print('The geograpical coordinates of Etobicoke are {}, {}.'.format(elatitude, elongitude))

The geograpical coordinates of Etobicoke are 43.6534817, -79.3839347.


In [15]:
# create map of Manhattan using latitude and longitude values
map_etobicoke = folium.Map(location=[elatitude, elongitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(etobicoke_data['Latitude'], etobicoke_data['Longitude'], etobicoke_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)  
    
map_etobicoke

<h4>Define Foursquare Credentials and Version</h4>

In [None]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

<h4>Let's explore the first neighborhood in our dataframe.</h4>

In [17]:
etobicoke_data.loc[0, 'Neighborhood']

'Islington Avenue'

In [18]:
neighborhood_latitude = etobicoke_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = etobicoke_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = etobicoke_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Islington Avenue are 43.6678556, -79.53224240000002.


<h4> Now, let's get the top 100 venues that are in Islington Avenue within a radius of 1000 meters </h4>

In [19]:
LIMIT = 100 
radius = 1000

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '605f66f2920e0a3ea896f5b8'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Edenbridge - Humber Valley',
  'headerFullLocation': 'Edenbridge - Humber Valley, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 12,
  'suggestedBounds': {'ne': {'lat': 43.676855609000015,
    'lng': -79.51982358836784},
   'sw': {'lat': 43.65885559099999, 'lng': -79.54466121163219}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bfd53764cf820a13849ecf4',
       'name': "Java Joe's Village Cafe",
       'location': {'address': '1500 Islington Ave',
        'crossStreet': 'at Rathburn Rd',
        'lat': 43.662460906352436,
        'lng': -7

In [20]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

<h4>Clean the json and structure it into a pandas dataframe</h4>

In [21]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Java Joe's Village Cafe,Café,43.662461,-79.532054
1,St Georges Golf and Country Club,Golf Course,43.674395,-79.537142
2,TD Canada Trust,Bank,43.662545,-79.531749
3,Shoppers Drug Mart,Pharmacy,43.663067,-79.531753
4,COBS Bread,Bakery,43.66494,-79.520485


In [22]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

12 venues were returned by Foursquare.


<h4> Function to repeat the same process to all the neighborhoods in Etobicoke </h4>

In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
etobicoke_venues = getNearbyVenues(names=etobicoke_data['Neighborhood'],
                                   latitudes=etobicoke_data['Latitude'],
                                   longitudes=etobicoke_data['Longitude']
                                  )

Islington Avenue
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Westmount
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
New Toronto, Mimico South, Humber Bay Shores
South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens
Alderwood, Long Branch
The Kingsway, Montgomery Road, Old Mill North
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West


In [25]:
print(etobicoke_venues.shape)
etobicoke_venues.head()

(255, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Islington Avenue,43.667856,-79.532242,Java Joe's Village Cafe,43.662461,-79.532054,Café
1,Islington Avenue,43.667856,-79.532242,St Georges Golf and Country Club,43.674395,-79.537142,Golf Course
2,Islington Avenue,43.667856,-79.532242,TD Canada Trust,43.662545,-79.531749,Bank
3,Islington Avenue,43.667856,-79.532242,Shoppers Drug Mart,43.663067,-79.531753,Pharmacy
4,Islington Avenue,43.667856,-79.532242,COBS Bread,43.66494,-79.520485,Bakery


<h4>No. of venues were returned for each neighborhood</h4>

In [26]:
etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood, Long Branch",27,27,27,27,27,27
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",17,17,17,17,17,17
Islington Avenue,12,12,12,12,12,12
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",16,16,16,16,16,16
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",62,62,62,62,62,62
"New Toronto, Mimico South, Humber Bay Shores",19,19,19,19,19,19
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",8,8,8,8,8,8
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",19,19,19,19,19,19
"The Kingsway, Montgomery Road, Old Mill North",46,46,46,46,46,46
"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",13,13,13,13,13,13


<h4>Analyze Each Neighborhood</h4>

In [27]:
# one hot encoding
etobicoke_onehot = pd.get_dummies(etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
etobicoke_onehot['Neighborhood'] = etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [etobicoke_onehot.columns[-1]] + list(etobicoke_onehot.columns[:-1])
etobicoke_onehot = etobicoke_onehot[fixed_columns]

etobicoke_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,...,Supplement Shop,Sushi Restaurant,Tanning Salon,Tapas Restaurant,Thai Restaurant,Toy / Game Store,Trail,Video Store,Wings Joint,Yoga Studio
0,Islington Avenue,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Islington Avenue,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Islington Avenue,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,Islington Avenue,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Islington Avenue,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
etobicoke_onehot.shape

(255, 97)

In [29]:
#Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

etobicoke_grouped = etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
etobicoke_grouped

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Auto Garage,Automotive Shop,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,...,Supplement Shop,Sushi Restaurant,Tanning Salon,Tapas Restaurant,Thai Restaurant,Toy / Game Store,Trail,Video Store,Wings Joint,Yoga Studio
0,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Islington Avenue,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Kingsview Village, St. Phillips, Martin Grove ...",0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,...,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Mimico NW, The Queensway West, South of Bloor,...",0.0,0.016129,0.0,0.016129,0.032258,0.0,0.032258,0.032258,0.0,...,0.016129,0.032258,0.016129,0.0,0.016129,0.0,0.0,0.0,0.016129,0.032258
5,"New Toronto, Mimico South, Humber Bay Shores",0.052632,0.052632,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0
8,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.043478,0.021739,...,0.0,0.043478,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0
9,"West Deane Park, Princess Gardens, Martin Grov...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
#Print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = etobicoke_grouped[etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
            venue  freq
0  Discount Store  0.11
1            Park  0.07
2        Pharmacy  0.07
3     Pizza Place  0.07
4             Pub  0.04


----Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood----
               venue  freq
0        Coffee Shop  0.12
1      Grocery Store  0.06
2        Gas Station  0.06
3          Pet Store  0.06
4  Electronics Store  0.06


----Islington Avenue----
               venue  freq
0           Pharmacy  0.17
1      Grocery Store  0.08
2               Café  0.08
3  Convenience Store  0.08
4               Park  0.08


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens----
                 venue  freq
0             Pharmacy  0.12
1  American Restaurant  0.06
2          Gas Station  0.06
3       Sandwich Place  0.06
4          Coffee Shop  0.06


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
                  venue  freq
0          

In [31]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [32]:
#Put this into a pandas dataframe

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = etobicoke_grouped['Neighborhood']

for ind in np.arange(etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Discount Store,Park,Pharmacy,Pizza Place,Sandwich Place,Shopping Mall,Coffee Shop,Liquor Store,Moroccan Restaurant,Dance Studio
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Coffee Shop,Pharmacy,Shopping Plaza,Grocery Store,Fish & Chips Shop,Farmers Market,Electronics Store,Liquor Store,College Rec Center,Pet Store
2,Islington Avenue,Pharmacy,Bakery,Shopping Mall,Convenience Store,Skating Rink,Playground,Bank,Golf Course,Grocery Store,Park
3,"Kingsview Village, St. Phillips, Martin Grove ...",Pharmacy,Beer Store,Intersection,Dry Cleaner,Mobile Phone Shop,Coffee Shop,Pizza Place,Chinese Restaurant,Sandwich Place,Bus Line
4,"Mimico NW, The Queensway West, South of Bloor,...",Restaurant,Gym / Fitness Center,Burrito Place,Yoga Studio,Italian Restaurant,Sandwich Place,Burger Joint,Convenience Store,Bank,Bakery


<h3>Cluster Neighborhoods</h3>

In [33]:
# set number of clusters
kclusters = 5

etobicoke_grouped_clustering = etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 1, 3, 1, 4, 4, 2, 0, 4, 4], dtype=int32)

In [34]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

etobicoke_merged = etobicoke_data

# merge etobicoke_grouped with etobicoke_data to add latitude/longitude for each neighborhood
etobicoke_merged = etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

etobicoke_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,3,Pharmacy,Bakery,Shopping Mall,Convenience Store,Skating Rink,Playground,Bank,Golf Course,Grocery Store,Park
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,4,Park,Pizza Place,Hotel,Restaurant,Convenience Store,Mexican Restaurant,Fish & Chips Shop,Clothing Store,Bank,Gym
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,1,Coffee Shop,Pharmacy,Shopping Plaza,Grocery Store,Fish & Chips Shop,Farmers Market,Electronics Store,Liquor Store,College Rec Center,Pet Store
3,M9P,Etobicoke,Westmount,43.696319,-79.532242,1,Gas Station,Golf Course,Ice Cream Shop,Middle Eastern Restaurant,Park,Coffee Shop,Pizza Place,Chinese Restaurant,Discount Store,Sandwich Place
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,1,Pharmacy,Beer Store,Intersection,Dry Cleaner,Mobile Phone Shop,Coffee Shop,Pizza Place,Chinese Restaurant,Sandwich Place,Bus Line


In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(etobicoke_merged['Latitude'], etobicoke_merged['Longitude'], etobicoke_merged['Neighborhood'], etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h4> Examine Clusters</h4>

In [36]:
#Cluster 1

etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 0, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Etobicoke,0,Pizza Place,Grocery Store,Pharmacy,Beer Store,Park,Coffee Shop,Liquor Store,Caribbean Restaurant,Sandwich Place,Fast Food Restaurant


In [37]:
#Cluster 2

etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 1, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Etobicoke,1,Coffee Shop,Pharmacy,Shopping Plaza,Grocery Store,Fish & Chips Shop,Farmers Market,Electronics Store,Liquor Store,College Rec Center,Pet Store
3,Etobicoke,1,Gas Station,Golf Course,Ice Cream Shop,Middle Eastern Restaurant,Park,Coffee Shop,Pizza Place,Chinese Restaurant,Discount Store,Sandwich Place
4,Etobicoke,1,Pharmacy,Beer Store,Intersection,Dry Cleaner,Mobile Phone Shop,Coffee Shop,Pizza Place,Chinese Restaurant,Sandwich Place,Bus Line


In [38]:
#Cluster 3

etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 2, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Etobicoke,2,Park,Ice Cream Shop,Eastern European Restaurant,Bus Stop,Italian Restaurant,Shopping Mall,Electronics Store,Cupcake Shop,Dance Studio,Deli / Bodega


In [39]:
#Cluster 4

etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 3, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,3,Pharmacy,Bakery,Shopping Mall,Convenience Store,Skating Rink,Playground,Bank,Golf Course,Grocery Store,Park


In [40]:
#Cluster 5

etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 4, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Etobicoke,4,Park,Pizza Place,Hotel,Restaurant,Convenience Store,Mexican Restaurant,Fish & Chips Shop,Clothing Store,Bank,Gym
5,Etobicoke,4,Park,Café,Fast Food Restaurant,Grocery Store,Gym,Indian Restaurant,Italian Restaurant,Dessert Shop,Liquor Store,Mexican Restaurant
7,Etobicoke,4,Discount Store,Park,Pharmacy,Pizza Place,Sandwich Place,Shopping Mall,Coffee Shop,Liquor Store,Moroccan Restaurant,Dance Studio
8,Etobicoke,4,Coffee Shop,Italian Restaurant,Breakfast Spot,French Restaurant,Park,Pub,Dessert Shop,Sushi Restaurant,Burger Joint,Pizza Place
10,Etobicoke,4,Restaurant,Gym / Fitness Center,Burrito Place,Yoga Studio,Italian Restaurant,Sandwich Place,Burger Joint,Convenience Store,Bank,Bakery
