# Coursera Capstone

This notebook gonna be used for the Coursera Capstone project

In [1]:
import pandas as pd
import numpy as np

In [2]:
print('Hello Capstone Project Course!')

Hello Capstone Project Course!


## Getting the data

As I'm not able to install BeautifulSoup gonna scrap the webpage with Pandas

In [3]:
data = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', flavor = 'bs4', header = 0)
data

[    Postcode           Borough  \
 0        M1A      Not assigned   
 1        M2A      Not assigned   
 2        M3A        North York   
 3        M4A        North York   
 4        M5A  Downtown Toronto   
 5        M5A  Downtown Toronto   
 6        M6A        North York   
 7        M6A        North York   
 8        M7A      Queen's Park   
 9        M8A      Not assigned   
 10       M9A         Etobicoke   
 11       M1B       Scarborough   
 12       M1B       Scarborough   
 13       M2B      Not assigned   
 14       M3B        North York   
 15       M4B         East York   
 16       M4B         East York   
 17       M5B  Downtown Toronto   
 18       M5B  Downtown Toronto   
 19       M6B        North York   
 20       M7B      Not assigned   
 21       M8B      Not assigned   
 22       M9B         Etobicoke   
 23       M9B         Etobicoke   
 24       M9B         Etobicoke   
 25       M9B         Etobicoke   
 26       M9B         Etobicoke   
 27       M1C       

As pd.read_html gives us a list containing all the tables, we just need to get the first one.
Once done, we just select the rows with no 'Not Assigned value' in the 'Borough' column

In [4]:
toronto = data[0]
toronto = toronto[toronto['Borough'] != 'Not assigned']
toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


We group the data frame by the first two rows and aggregate de value of Neighbourhood separated by a coma.

In [5]:
toronto = toronto.groupby(['Postcode', 'Borough'], as_index = False)['Neighbourhood'].agg(', '.join)

We replace the value 'Not assigned0 in Neighbourhood by the value of the same row in 'Borough'

In [6]:
toronto.Neighbourhood.replace(to_replace = 'Not assigned', value = toronto.Borough, inplace = True)

In [7]:
toronto.shape

(103, 3)

## Longitudes and Latitudes

In [8]:
import geocoder

In [None]:
# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(toronto['Postcode']))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

In [9]:
geospatial = pd.read_csv('Geospatial_Coordinates.csv')
geospatial.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
geospatial.rename(index=str, columns={"Postal Code": "Postcode"}, inplace = True)
geospatial.head()

In [40]:
neighborhoods = pd.merge(toronto, geospatial, on = 'Postcode')
neighborhoods.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Clustering

In [41]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
len(neighborhoods['Borough'].unique()), neighborhoods.shape[0]))

The dataframe has 11 boroughs and 103 neighborhoods.


In [24]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [None]:
address = 'Toronto, CN'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6425637, -79.3870871832047.

In [27]:
import folium

In [44]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

In [52]:
# FourSquare credentials:
CLIENT_ID = "REMOVED"
CLIENT_SECRET = "REMOVED"
VERSION = "20180605"

In [46]:
neighborhoods.loc[0, 'Neighbourhood']

'Rouge, Malvern'

In [47]:
neighborhood_latitude = neighborhoods.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighborhoods.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighborhoods.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge, Malvern are 43.806686299999996, -79.19435340000001.


In [None]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 1000 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url

In [54]:
import requests 
import json

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c18e2c56a607133fa898245'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 19,
  'suggestedBounds': {'ne': {'lat': 43.81568630900001,
    'lng': -79.18190576146081},
   'sw': {'lat': 43.797686290999984, 'lng': -79.20680103853921}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d669cba83865481c948fa53',
       'name': 'Images Salon & Spa',
       'location': {'address': '8130 Sheppard Ave E',
        'crossStreet': 'Morningside Ave',
        'lat': 43.80228301948931,
        'lng': -79.19856472801668,
        'labeledLatLngs'

In [55]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']

    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [59]:
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]         

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Caribbean Wave,Caribbean Restaurant,43.798558,-79.195777
2,Wendy's,Fast Food Restaurant,43.802008,-79.19808
3,Harvey's,Fast Food Restaurant,43.800106,-79.198258
4,Wendy's,Fast Food Restaurant,43.807448,-79.199056


## Exploring neighborhoods

In [61]:
def getNearbyVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [63]:
toronto_venues = getNearbyVenues(names=neighborhoods['Neighbourhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West, Steeles West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The D

In [64]:
# Check shape
print(toronto_venues.shape)
toronto_venues.head()

(8488, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,African Rainforest Pavilion,43.817725,-79.183433,Zoo Exhibit
1,"Rouge, Malvern",43.806686,-79.194353,Toronto Pan Am Sports Centre,43.790623,-79.193869,Athletics & Sports
2,"Rouge, Malvern",43.806686,-79.194353,Toronto Zoo,43.820582,-79.181551,Zoo
3,"Rouge, Malvern",43.806686,-79.194353,Canadiana exhibit,43.817962,-79.193374,Zoo Exhibit
4,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa


In [65]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,100,100,100,100,100,100
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",89,89,89,89,89,89
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",51,51,51,51,51,51
"Alderwood, Long Branch",100,100,100,100,100,100
"Bathurst Manor, Downsview North, Wilson Heights",55,55,55,55,55,55
Bayview Village,43,43,43,43,43,43
"Bedford Park, Lawrence Manor East",100,100,100,100,100,100
Berczy Park,100,100,100,100,100,100
"Birch Cliff, Cliffside West",45,45,45,45,45,45


In [66]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 322 uniques categories.


## Analyzing each neighborhood

In [68]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhoods'] = toronto_venues['Neighborhood']

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhoods,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,...,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [69]:
toronto_onehot.shape

(8488, 323)

In [70]:
toronto_grouped = toronto_onehot.groupby('Neighborhoods').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhoods,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,...,Vietnamese Restaurant,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Adelaide, King, Richmond",0.00,0.00,0.000000,0.00,0.00,0.020000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
1,Agincourt,0.00,0.00,0.000000,0.00,0.00,0.010000,0.000000,0.00,0.000000,...,0.010000,0.000000,0.00,0.00,0.00,0.010000,0.010000,0.000000,0.000000,0.000000
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.00,0.00,0.000000,0.00,0.00,0.000000,0.000000,0.00,0.000000,...,0.033708,0.000000,0.00,0.00,0.00,0.011236,0.000000,0.000000,0.000000,0.000000
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.00,0.00,0.000000,0.00,0.00,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
4,"Alderwood, Long Branch",0.00,0.00,0.000000,0.00,0.00,0.010000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.020000,0.000000,0.000000,0.000000,0.000000
5,"Bathurst Manor, Downsview North, Wilson Heights",0.00,0.00,0.000000,0.00,0.00,0.018182,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
6,Bayview Village,0.00,0.00,0.000000,0.00,0.00,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
7,"Bedford Park, Lawrence Manor East",0.00,0.00,0.000000,0.00,0.00,0.010000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.010000,0.000000,0.000000,0.000000,0.000000
8,Berczy Park,0.00,0.00,0.000000,0.00,0.00,0.020000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000
9,"Birch Cliff, Cliffside West",0.00,0.00,0.000000,0.00,0.00,0.000000,0.000000,0.00,0.000000,...,0.000000,0.000000,0.00,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000


In [71]:
toronto_grouped.shape

(103, 323)

In [72]:
# Most common venues for each neighborhood

num_top_venues = 5

for hood in toronto_grouped['Neighborhoods']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhoods'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop  0.06
1        Hotel  0.04
2      Theater  0.04
3         Café  0.04
4   Restaurant  0.03


----Agincourt----
                 venue  freq
0   Chinese Restaurant  0.11
1          Coffee Shop  0.07
2               Bakery  0.04
3             Pharmacy  0.04
4  Japanese Restaurant  0.03


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                 venue  freq
0   Chinese Restaurant  0.18
1          Coffee Shop  0.08
2    Korean Restaurant  0.06
3          Pizza Place  0.04
4  Japanese Restaurant  0.03


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0           Coffee Shop  0.16
1  Fast Food Restaurant  0.10
2     Indian Restaurant  0.08
3           Pizza Place  0.08
4         Grocery Store  0.06


----Alderwood, Long Branch----
                  venue  freq
0           Coffee Shop  0.10
1           P

            venue  freq
0     Coffee Shop  0.09
1   Grocery Store  0.05
2  Discount Store  0.05
3  Sandwich Place  0.05
4      Beer Store  0.05


----Fairview, Henry Farm, Oriole----
                       venue  freq
0                Coffee Shop  0.13
1             Sandwich Place  0.05
2             Clothing Store  0.05
3               Intersection  0.04
4  Middle Eastern Restaurant  0.04


----First Canadian Place, Underground city----
                 venue  freq
0          Coffee Shop  0.04
1                Hotel  0.04
2                 Café  0.04
3              Theater  0.04
4  Japanese Restaurant  0.03


----Flemingdon Park, Don Mills South----
                 venue  freq
0          Coffee Shop  0.09
1                 Park  0.05
2  Japanese Restaurant  0.05
3           Restaurant  0.03
4       Sandwich Place  0.03


----Forest Hill North, Forest Hill West----
                 venue  freq
0          Coffee Shop  0.10
1   Italian Restaurant  0.09
2                 Café  0.07
3    

                venue  freq
0         Coffee Shop  0.07
1   French Restaurant  0.06
2                Park  0.04
3  Italian Restaurant  0.04
4                Café  0.04


----The Beaches----
            venue  freq
0     Coffee Shop  0.12
1             Pub  0.08
2           Beach  0.05
3  Breakfast Spot  0.04
4          Bakery  0.04


----The Beaches West, India Bazaar----
         venue  freq
0         Café  0.07
1         Park  0.06
2        Beach  0.05
3  Coffee Shop  0.05
4       Bakery  0.04


----The Danforth West, Riverdale----
                   venue  freq
0       Greek Restaurant  0.09
1                   Café  0.06
2                   Park  0.06
3                 Bakery  0.05
4  Vietnamese Restaurant  0.04


----The Junction North, Runnymede----
                venue  freq
0              Bakery  0.07
1  Italian Restaurant  0.05
2                Café  0.05
3        Burger Joint  0.04
4                 Bar  0.04


----The Kingsway, Montgomery Road, Old Mill North----
          

In [76]:
def return_most_common_venues(row, num_top_venues):
    
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)

    return row_categories_sorted.index.values[0:num_top_venues]

In [77]:
# Let’s create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhoods']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Hotel,Café,Theater,Italian Restaurant,Restaurant,Gastropub,Thai Restaurant,Concert Hall,Gym / Fitness Center
1,Agincourt,Chinese Restaurant,Coffee Shop,Bakery,Pharmacy,Japanese Restaurant,Sandwich Place,Restaurant,Indian Restaurant,Fast Food Restaurant,Cantonese Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Chinese Restaurant,Coffee Shop,Korean Restaurant,Pizza Place,Park,Hong Kong Restaurant,Bakery,Dessert Shop,Vietnamese Restaurant,Japanese Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Coffee Shop,Fast Food Restaurant,Indian Restaurant,Pizza Place,Grocery Store,Park,Sandwich Place,Café,Fried Chicken Joint,Steakhouse
4,"Alderwood, Long Branch",Coffee Shop,Burger Joint,Fast Food Restaurant,Pizza Place,Pharmacy,Breakfast Spot,Department Store,Sandwich Place,Seafood Restaurant,Electronics Store
5,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Pizza Place,Convenience Store,Park,Sandwich Place,Pharmacy,Bank,Deli / Bodega,Fast Food Restaurant,American Restaurant
6,Bayview Village,Chinese Restaurant,Shopping Mall,Pharmacy,Park,Coffee Shop,Bank,Grocery Store,Café,Liquor Store,Supermarket
7,"Bedford Park, Lawrence Manor East",Coffee Shop,Bakery,Sushi Restaurant,Italian Restaurant,Sandwich Place,Bagel Shop,Pizza Place,Deli / Bodega,Pharmacy,Café
8,Berczy Park,Coffee Shop,Hotel,Italian Restaurant,Café,Gastropub,Restaurant,Seafood Restaurant,Japanese Restaurant,Farmers Market,Park
9,"Birch Cliff, Cliffside West",Coffee Shop,Park,Bank,Grocery Store,Gym,Pharmacy,Fast Food Restaurant,Beer Store,Pizza Place,Fish & Chips Shop


## Cluster neighborhoods

In [78]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhoods', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 2, 2, 0, 2, 0, 2, 2, 1, 0])

In [80]:
toronto_merged = neighborhoods

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhoods'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,1,Zoo Exhibit,Fast Food Restaurant,Zoo,Athletics & Sports,Coffee Shop,Park,Paper / Office Supplies Store,Other Great Outdoors,Caribbean Restaurant,Curling Ice
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,2,Coffee Shop,Pizza Place,Sandwich Place,Breakfast Spot,Beer Store,Fast Food Restaurant,Supermarket,Neighborhood,Fried Chicken Joint,Liquor Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2,Pizza Place,Coffee Shop,Fast Food Restaurant,Park,Greek Restaurant,Juice Bar,Supermarket,Beer Store,Sports Bar,Breakfast Spot
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Fast Food Restaurant,Park,Furniture / Home Store,Chinese Restaurant,Sandwich Place,Pharmacy,Discount Store,Beer Store,Indian Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2,Coffee Shop,Fast Food Restaurant,Gym,Clothing Store,Wings Joint,Bank,Indian Restaurant,Pizza Place,Sandwich Place,Restaurant


In [82]:

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Examining the clusters

### Cluster 1

In [85]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,0,Coffee Shop,Fast Food Restaurant,Park,Furniture / Home Store,Chinese Restaurant,Sandwich Place,Pharmacy,Discount Store,Beer Store,Indian Restaurant
5,Scarborough,0,Fast Food Restaurant,Coffee Shop,Pizza Place,Grocery Store,Sandwich Place,Pharmacy,Chinese Restaurant,Discount Store,Liquor Store,Shopping Mall
9,Scarborough,0,Coffee Shop,Park,Bank,Grocery Store,Gym,Pharmacy,Fast Food Restaurant,Beer Store,Pizza Place,Fish & Chips Shop
10,Scarborough,0,Coffee Shop,Fast Food Restaurant,Pharmacy,Grocery Store,Pizza Place,Chinese Restaurant,Sandwich Place,Pet Store,Breakfast Spot,Wings Joint
23,North York,0,Coffee Shop,Bank,Pizza Place,Japanese Restaurant,Sandwich Place,Fast Food Restaurant,Fried Chicken Joint,Gym,Grocery Store,Café
24,North York,0,Coffee Shop,Pizza Place,Park,Middle Eastern Restaurant,Pharmacy,Bagel Shop,Korean Restaurant,Grocery Store,Sandwich Place,Bakery
25,North York,0,Coffee Shop,Japanese Restaurant,Pharmacy,Pizza Place,Sandwich Place,Asian Restaurant,Bank,Chinese Restaurant,Supermarket,Caribbean Restaurant
31,North York,0,Vietnamese Restaurant,Coffee Shop,Grocery Store,Pizza Place,Spa,Sandwich Place,Bank,Tea Room,Discount Store,Food & Drink Shop
34,North York,0,Coffee Shop,Fast Food Restaurant,Gym,Grocery Store,Sandwich Place,Clothing Store,Japanese Restaurant,Middle Eastern Restaurant,Mediterranean Restaurant,Discount Store
36,East York,0,Park,Café,Coffee Shop,Pizza Place,Thai Restaurant,Gastropub,Bakery,Breakfast Spot,Ice Cream Shop,Bar


### Cluster 2

In [86]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,1,Zoo Exhibit,Fast Food Restaurant,Zoo,Athletics & Sports,Coffee Shop,Park,Paper / Office Supplies Store,Other Great Outdoors,Caribbean Restaurant,Curling Ice
8,Scarborough,1,Harbor / Marina,Park,Fast Food Restaurant,Pharmacy,Grocery Store,Pizza Place,Coffee Shop,Beach,Sandwich Place,Sushi Restaurant
11,Scarborough,1,Coffee Shop,Fast Food Restaurant,Pharmacy,Middle Eastern Restaurant,Burger Joint,Breakfast Spot,Sandwich Place,Restaurant,Indian Restaurant,Vietnamese Restaurant
12,Scarborough,1,Chinese Restaurant,Coffee Shop,Bakery,Pharmacy,Japanese Restaurant,Sandwich Place,Restaurant,Indian Restaurant,Fast Food Restaurant,Cantonese Restaurant
14,Scarborough,1,Chinese Restaurant,Coffee Shop,Korean Restaurant,Pizza Place,Park,Hong Kong Restaurant,Bakery,Dessert Shop,Vietnamese Restaurant,Japanese Restaurant
15,Scarborough,1,Chinese Restaurant,Coffee Shop,Bakery,Japanese Restaurant,Fast Food Restaurant,Sandwich Place,Pharmacy,Park,Pizza Place,Intersection
19,North York,1,Chinese Restaurant,Shopping Mall,Pharmacy,Park,Coffee Shop,Bank,Grocery Store,Café,Liquor Store,Supermarket
20,North York,1,Coffee Shop,Bank,Burger Joint,Pizza Place,Park,Café,Supermarket,Furniture / Home Store,Japanese Restaurant,Thai Restaurant
21,North York,1,Korean Restaurant,Coffee Shop,Bubble Tea Shop,Japanese Restaurant,Bank,Middle Eastern Restaurant,Dessert Shop,Fast Food Restaurant,Café,Ramen Restaurant
22,North York,1,Korean Restaurant,Ramen Restaurant,Burrito Place,Sushi Restaurant,Fried Chicken Joint,Café,Hotel,Tea Room,Bubble Tea Shop,Dessert Shop


### Cluster 3

In [87]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,2,Coffee Shop,Pizza Place,Sandwich Place,Breakfast Spot,Beer Store,Fast Food Restaurant,Supermarket,Neighborhood,Fried Chicken Joint,Liquor Store
2,Scarborough,2,Pizza Place,Coffee Shop,Fast Food Restaurant,Park,Greek Restaurant,Juice Bar,Supermarket,Beer Store,Sports Bar,Breakfast Spot
4,Scarborough,2,Coffee Shop,Fast Food Restaurant,Gym,Clothing Store,Wings Joint,Bank,Indian Restaurant,Pizza Place,Sandwich Place,Restaurant
6,Scarborough,2,Grocery Store,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Pharmacy,Pizza Place,Discount Store,Beer Store,Train Station,Bank
7,Scarborough,2,Fast Food Restaurant,Coffee Shop,Sandwich Place,Burger Joint,Shoe Store,Pizza Place,Pet Store,Burrito Place,Cosmetics Shop,Sporting Goods Shop
13,Scarborough,2,Fast Food Restaurant,Coffee Shop,Pizza Place,Park,Chinese Restaurant,Pharmacy,Bakery,Bank,Sandwich Place,Falafel Restaurant
16,Scarborough,2,Sculpture Garden,Grocery Store,Farm,Playground,Golf Course,Egyptian Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
17,North York,2,Coffee Shop,Bank,Pharmacy,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Park,Bakery,Pizza Place,Asian Restaurant
18,North York,2,Coffee Shop,Sandwich Place,Clothing Store,Intersection,Fast Food Restaurant,Middle Eastern Restaurant,Pharmacy,Park,Bakery,Pizza Place
26,North York,2,Coffee Shop,Japanese Restaurant,Park,Restaurant,Pizza Place,Bank,Burger Joint,Supermarket,American Restaurant,Steakhouse


### Cluster 4

In [88]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
93,Etobicoke,3,Pharmacy,Coffee Shop,Park,Café,Golf Course,Liquor Store,Grocery Store,Shopping Mall,Bank,Bus Stop


### Cluster 5

In [90]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
78,West Toronto,4,Café,Coffee Shop,Restaurant,Bar,Arts & Crafts Store,Sandwich Place,Thrift / Vintage Store,Athletics & Sports,Caribbean Restaurant,Tea Room
