# Capstone Project - Analyzing location for an African Restaurant

import all required libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
#import folium # map rendering library

print('Libraries imported.')

Libraries imported.


read the wikipedia link webpage html tables into an array of pandas dataframes

In [2]:
rawdata = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

check the datatype of our variable

since the neighbourhood table is the first table on the webpage, we would reference it using position [0]

In [5]:
data = rawdata[0]

In [6]:
data.describe()

Unnamed: 0,Postal code,Borough,Neighborhood
count,180,180,103
unique,180,11,98
top,M7P,Not assigned,Downsview
freq,1,77,4


In [7]:
data

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge


rename the dataframe columns

In [8]:
data.rename(columns = {'Postal code':'PostalCode', 
                       'Borough':'Borough',
                       'Neighbourhood':'Neighborhood'}, inplace = True)

In [9]:
data = data[data.Borough != 'Not assigned']

In [10]:
data = data[data.Neighborhood != 'Not assigned']

In [14]:
# convert to list
data['Neighborhood'] = data['Neighborhood'].str.split('/')

In [15]:
# convert list of pd.Series then stack it
data = (data
 .set_index(['PostalCode','Borough'])['Neighborhood']
 .apply(pd.Series)
 .stack()
 .reset_index()
 .drop('level_2', axis=1)
 .rename(columns={0:'Neighborhood'}))

In [136]:
data

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Manor
5,M6A,North York,Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park
7,M7A,Downtown Toronto,Ontario Provincial Government
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern


check for duplicates

In [16]:
# check for duplicates
duplicateRowsDF = data[data.duplicated()]
 
print("Duplicate Rows except first occurrence based on all columns are :")
print(duplicateRowsDF)

Duplicate Rows except first occurrence based on all columns are :
Empty DataFrame
Columns: [PostalCode, Borough, Neighborhood]
Index: []


show the number of rows and columns of the dataframe

In [17]:
data.shape

(208, 3)

# --------------------- THIS IS THE START OF PART TWO(2) OF THE NOTEBOOK --------------------- #

read the csv file into a pandas dataframe

In [18]:
coordinate_data = pd.read_csv("http://cocl.us/Geospatial_data")

In [19]:
coordinate_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


rename the dataframe column 'Postal Code' to 'PostalCode' in order to make it possible to join/merge with the main dataframe

In [20]:
coordinate_data.rename(columns = {'Postal Code':'PostalCode'}, inplace = True) 

merge/join the two dataframes to create a single dataframe

In [21]:
neighbourhood_data = pd.merge(data, coordinate_data, on='PostalCode')

neighbourhood_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M6A,North York,Lawrence Manor,43.718518,-79.464763
5,M6A,North York,Lawrence Heights,43.718518,-79.464763
6,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
7,M7A,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494
8,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
9,M1B,Scarborough,Malvern,43.806686,-79.194353


In [22]:
print('The dataframe has {} neighborhoods.'.format(
        neighbourhood_data.shape[0]
    )
)

The dataframe has 208 neighborhoods.


#### Use geopy library to get the latitude and longitude values of the City of Toronto ####

In order to define an instance of the geocoder, we need to define a user_agent. We will name our agent toronto_explorer, as shown below.

In [23]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Create a map of Toronto with neighborhoods superimposed on top ####

In [25]:
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.4.0               |             py_0          26 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ------------------------------------------------------------
                       

In [26]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighbourhood_data['Latitude'], neighbourhood_data['Longitude'], neighbourhood_data['Borough'], neighbourhood_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [27]:
neighbourhood_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M6A,North York,Lawrence Manor,43.718518,-79.464763


Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.

#### Define Foursquare Credentials and Version ####

In [28]:
CLIENT_ID = 'RTHVMUAR5T0NTZL1ZLRH4SUU53FLSNKN0AGRLKBEU5MOB14H' # your Foursquare ID
CLIENT_SECRET = '4K5MWMBRHIWTVXMWZHV3FHJJF4YTRBOZOVV14A1QCIZHF4AJ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RTHVMUAR5T0NTZL1ZLRH4SUU53FLSNKN0AGRLKBEU5MOB14H
CLIENT_SECRET:4K5MWMBRHIWTVXMWZHV3FHJJF4YTRBOZOVV14A1QCIZHF4AJ


#### Let's explore the first neighborhood in our dataframe. ####

Get the neighborhood's name.

In [29]:
neighbourhood_data.loc[0, 'Neighborhood']

'Parkwoods'

Get the neighborhood's latitude and longitude values.

In [30]:
neighborhood_latitude = neighbourhood_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighbourhood_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = neighbourhood_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


#### Now, let's get the top 100 venues that are in Marble Hill within a radius of 500 meters. ####

First, let's create the GET request URL. Name your URL url.

In [31]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=RTHVMUAR5T0NTZL1ZLRH4SUU53FLSNKN0AGRLKBEU5MOB14H&client_secret=4K5MWMBRHIWTVXMWZHV3FHJJF4YTRBOZOVV14A1QCIZHF4AJ&v=20180605&ll=43.7532586,-79.3296565&radius=500&limit=100'

Send the GET request and examine the resutls

In [32]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e8fa7f71d67cb001bdcb8c2'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 2,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

function that extracts the category of the venue

In [33]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Now we are ready to clean the json and structure it into a pandas dataframe.

In [34]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114


And how many venues were returned by Foursquare?

In [35]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

2 venues were returned by Foursquare.


## 2. Explore Neighborhoods in Downtown Toronto ##

#### Let's create a function to repeat the same process to all the neighborhoods in Downtown Toronto ####

In [36]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, query='Food', categoryID = '4d4b7105d754a06374d81259'):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&q={}&radius={}&limit={}&categoryId={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng,
        query,
        radius, 
        LIMIT,
        categoryID)
            
        # make the GET request
        results = requests.get(url).json()["response"]['venues']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['name'], 
            v['location']['lat'], 
            v['location']['lng'],  
            v['categories'][0]['pluralName']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [37]:
toronto_venues = getNearbyVenues(names=neighbourhood_data['Neighborhood'],
                                   latitudes=neighbourhood_data['Latitude'],
                                   longitudes=neighbourhood_data['Longitude']
                                  )

#### Let's check the size of the resulting dataframe ####

In [38]:
print(toronto_venues.shape)
toronto_venues.head()

(4781, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurants
1,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shops
2,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurants
3,Victoria Village,43.725882,-79.315572,Pizza Nova,43.725824,-79.31286,Pizza Places
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurants


Let's check how many venues were returned for each neighborhood

In [39]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,50,50,50,50,50,50
Agincourt North,19,19,19,19,19,19
Albion Gardens,38,38,38,38,38,38
Bathurst Quay,4,4,4,4,4,4
Beaumond Heights,38,38,38,38,38,38
Bloordale Gardens,5,5,5,5,5,5
Cabbagetown,48,48,48,48,48,48
Chinatown,50,50,50,50,50,50
Clairlea,9,9,9,9,9,9
Cliffcrest,1,1,1,1,1,1


#### Let's find out how many unique categories can be curated from all the returned venues ####

In [40]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 139 uniques categories.


## 3. Analyze Each Neighborhood ##

In [51]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurants,African Restaurants,American Restaurants,Argentinian Restaurants,Asian Restaurants,Australian Restaurants,BBQ Joints,Bagel Shops,Bakeries,Bars,Beer Bars,Bistros,Bowling Alleys,Brazilian Restaurants,Breakfast Spots,Breweries,Bubble Tea Shops,Buffets,Burger Joints,Burmese Restaurants,Burrito Places,Butchers,Cafeterias,Cafés,Cajun / Creole Restaurants,Cambodian Restaurants,Candy Stores,Caribbean Restaurants,Chinese Breakfast Places,Chinese Restaurants,Chocolate Shops,Cocktail Bars,Coffee Shops,Comfort Food Restaurants,Comic Shops,Convenience Stores,Corporate Cafeterias,Coworking Spaces,Creperies,Cuban Restaurants,Cupcake Shops,Delis / Bodegas,Dessert Shops,Dim Sum Restaurants,Diners,Doner Restaurants,Donut Shops,Dumpling Restaurants,Eastern European Restaurants,Empanada Restaurants,English Restaurants,Ethiopian Restaurants,Event Spaces,Falafel Restaurants,Fast Food Restaurants,Filipino Restaurants,Fish & Chips Shops,Food,Food & Drink Shops,Food Courts,Food Services,Food Stands,Food Trucks,French Restaurants,Fried Chicken Joints,Frozen Yogurt Shops,Gastropubs,Gluten-free Restaurants,Gourmet Shops,Greek Restaurants,Grocery Stores,Hakka Restaurants,Halal Restaurants,Hawaiian Restaurants,Hot Dog Joints,Hotel Bars,Hungarian Restaurants,Ice Cream Shops,Indian Restaurants,Irish Pubs,Italian Restaurants,Japanese Restaurants,Jewish Restaurants,Juice Bars,Kebab Restaurants,Korean Restaurants,Latin American Restaurants,Lebanese Restaurants,Lounges,Mac & Cheese Joints,Mediterranean Restaurants,Mexican Restaurants,Middle Eastern Restaurants,Modern European Restaurants,New American Restaurants,Noodle Houses,Offices,Pakistani Restaurants,Pastry Shops,Peking Duck Restaurants,Persian Restaurants,Pizza Places,Poke Places,Polish Restaurants,Pool Halls,Portuguese Restaurants,Poutine Places,Pubs,Ramen Restaurants,Restaurants,Salad Places,Sandwich Places,Seafood Restaurants,Shawarma Places,Shopping Malls,Smoothie Shops,Snack Places,South Indian Restaurants,Southern / Soul Food Restaurants,Spanish Restaurants,Speakeasies,Sports Bars,Sri Lankan Restaurants,Steakhouses,Sushi Restaurants,Szechuan Restaurants,Taco Places,Taiwanese Restaurants,Tapas Restaurants,Tea Rooms,Thai Restaurants,Theme Restaurants,Tibetan Restaurants,Turkish Restaurants,Ukrainian Restaurants,Vegetarian / Vegan Restaurants,Vietnamese Restaurants,Wine Bars,Wings Joints
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [52]:
toronto_onehot.shape

(4781, 140)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category ####

In [53]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurants,African Restaurants,American Restaurants,Argentinian Restaurants,Asian Restaurants,Australian Restaurants,BBQ Joints,Bagel Shops,Bakeries,Bars,Beer Bars,Bistros,Bowling Alleys,Brazilian Restaurants,Breakfast Spots,Breweries,Bubble Tea Shops,Buffets,Burger Joints,Burmese Restaurants,Burrito Places,Butchers,Cafeterias,Cafés,Cajun / Creole Restaurants,Cambodian Restaurants,Candy Stores,Caribbean Restaurants,Chinese Breakfast Places,Chinese Restaurants,Chocolate Shops,Cocktail Bars,Coffee Shops,Comfort Food Restaurants,Comic Shops,Convenience Stores,Corporate Cafeterias,Coworking Spaces,Creperies,Cuban Restaurants,Cupcake Shops,Delis / Bodegas,Dessert Shops,Dim Sum Restaurants,Diners,Doner Restaurants,Donut Shops,Dumpling Restaurants,Eastern European Restaurants,Empanada Restaurants,English Restaurants,Ethiopian Restaurants,Event Spaces,Falafel Restaurants,Fast Food Restaurants,Filipino Restaurants,Fish & Chips Shops,Food,Food & Drink Shops,Food Courts,Food Services,Food Stands,Food Trucks,French Restaurants,Fried Chicken Joints,Frozen Yogurt Shops,Gastropubs,Gluten-free Restaurants,Gourmet Shops,Greek Restaurants,Grocery Stores,Hakka Restaurants,Halal Restaurants,Hawaiian Restaurants,Hot Dog Joints,Hotel Bars,Hungarian Restaurants,Ice Cream Shops,Indian Restaurants,Irish Pubs,Italian Restaurants,Japanese Restaurants,Jewish Restaurants,Juice Bars,Kebab Restaurants,Korean Restaurants,Latin American Restaurants,Lebanese Restaurants,Lounges,Mac & Cheese Joints,Mediterranean Restaurants,Mexican Restaurants,Middle Eastern Restaurants,Modern European Restaurants,New American Restaurants,Noodle Houses,Offices,Pakistani Restaurants,Pastry Shops,Peking Duck Restaurants,Persian Restaurants,Pizza Places,Poke Places,Polish Restaurants,Pool Halls,Portuguese Restaurants,Poutine Places,Pubs,Ramen Restaurants,Restaurants,Salad Places,Sandwich Places,Seafood Restaurants,Shawarma Places,Shopping Malls,Smoothie Shops,Snack Places,South Indian Restaurants,Southern / Soul Food Restaurants,Spanish Restaurants,Speakeasies,Sports Bars,Sri Lankan Restaurants,Steakhouses,Sushi Restaurants,Szechuan Restaurants,Taco Places,Taiwanese Restaurants,Tapas Restaurants,Tea Rooms,Thai Restaurants,Theme Restaurants,Tibetan Restaurants,Turkish Restaurants,Ukrainian Restaurants,Vegetarian / Vegan Restaurants,Vietnamese Restaurants,Wine Bars,Wings Joints
0,Adelaide,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.08,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.26,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.1,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt North,0.0,0.0,0.0,0.0,0.105263,0.0,0.157895,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.210526,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Albion Gardens,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.131579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.078947,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.263158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bathurst Quay,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Beaumond Heights,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.131579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.026316,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.078947,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.263158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bloordale Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Cabbagetown,0.0,0.0,0.020833,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0625,0.0,0.0,0.020833,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.0,0.104167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.020833,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.020833,0.0,0.041667,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.041667,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.020833,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.104167,0.0,0.0,0.0,0.0,0.020833,0.0,0.0,0.0
7,Chinatown,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.14,0.0,0.02,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.04,0.1,0.02,0.0
8,Clairlea,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Cliffcrest,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's confirm the new size ####

In [54]:
toronto_grouped.shape

(195, 140)

#### Let's print each neighborhood along with the top 5 most common venues ####

In [55]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Adelaide ----
                  venue  freq
0          Coffee Shops  0.26
1           Restaurants  0.10
2           Food Courts  0.10
3                 Cafés  0.08
4  Japanese Restaurants  0.06


---- Agincourt North ----
                   venue  freq
0    Chinese Restaurants  0.21
1             BBQ Joints  0.16
2  Fast Food Restaurants  0.11
3      Asian Restaurants  0.11
4               Bakeries  0.11


---- Albion Gardens----
                   venue  freq
0           Pizza Places  0.26
1               Bakeries  0.13
2  Caribbean Restaurants  0.11
3     Indian Restaurants  0.08
4        Sandwich Places  0.05


---- Bathurst  Quay ----
                  venue  freq
0  American Restaurants  0.25
1          Coffee Shops  0.25
2                  Bars  0.25
3     Tapas Restaurants  0.25
4    Afghan Restaurants  0.00


---- Beaumond Heights ----
                   venue  freq
0           Pizza Places  0.26
1               Bakeries  0.13
2  Caribbean Restaurants  0.11
3     Indian Re

#### Let's put that into a pandas dataframe ####

First, let's write a function to sort the venues in descending order.

In [56]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [57]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shops,Food Courts,Restaurants,Cafés,Japanese Restaurants,Bars,Middle Eastern Restaurants,Delis / Bodegas,Cafeterias,Pubs
1,Agincourt North,Chinese Restaurants,BBQ Joints,Asian Restaurants,Bakeries,Fast Food Restaurants,Caribbean Restaurants,Pizza Places,Dumpling Restaurants,Coffee Shops,Food Courts
2,Albion Gardens,Pizza Places,Bakeries,Caribbean Restaurants,Indian Restaurants,Sandwich Places,Fast Food Restaurants,Chinese Restaurants,Coffee Shops,Bubble Tea Shops,Food Trucks
3,Bathurst Quay,Tapas Restaurants,Bars,American Restaurants,Coffee Shops,Empanada Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners
4,Beaumond Heights,Pizza Places,Bakeries,Caribbean Restaurants,Indian Restaurants,Sandwich Places,Fast Food Restaurants,Chinese Restaurants,Coffee Shops,Bubble Tea Shops,Food Trucks


## 4. Cluster Neighborhoods ##

Run k-means to cluster the neighborhood into 5 clusters.

In [59]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 3, 0, 3, 0, 0, 0, 3, 1], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [60]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = neighbourhood_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Fast Food Restaurants,Wings Joints,Dim Sum Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners,Dessert Shops,English Restaurants
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Pizza Places,Coffee Shops,Portuguese Restaurants,French Restaurants,Burger Joints,Dim Sum Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants
2,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636,0.0,Coffee Shops,Italian Restaurants,Cafés,Bakeries,Mediterranean Restaurants,Restaurants,Chinese Restaurants,Breakfast Spots,Pizza Places,Sandwich Places
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0.0,Coffee Shops,Italian Restaurants,Cafés,Bakeries,Mediterranean Restaurants,Restaurants,Chinese Restaurants,Breakfast Spots,Pizza Places,Sandwich Places
4,M6A,North York,Lawrence Manor,43.718518,-79.464763,0.0,Hot Dog Joints,Dessert Shops,BBQ Joints,Cafés,Fast Food Restaurants,Bowling Alleys,Diners,Bakeries,Korean Restaurants,Food Trucks


In [61]:
toronto_merged.dropna(axis = 0, how ='any', inplace=True)

In [62]:
toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Fast Food Restaurants,Wings Joints,Dim Sum Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners,Dessert Shops,English Restaurants
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Pizza Places,Coffee Shops,Portuguese Restaurants,French Restaurants,Burger Joints,Dim Sum Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants
2,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636,0.0,Coffee Shops,Italian Restaurants,Cafés,Bakeries,Mediterranean Restaurants,Restaurants,Chinese Restaurants,Breakfast Spots,Pizza Places,Sandwich Places
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,0.0,Coffee Shops,Italian Restaurants,Cafés,Bakeries,Mediterranean Restaurants,Restaurants,Chinese Restaurants,Breakfast Spots,Pizza Places,Sandwich Places
4,M6A,North York,Lawrence Manor,43.718518,-79.464763,0.0,Hot Dog Joints,Dessert Shops,BBQ Joints,Cafés,Fast Food Restaurants,Bowling Alleys,Diners,Bakeries,Korean Restaurants,Food Trucks
5,M6A,North York,Lawrence Heights,43.718518,-79.464763,0.0,Hot Dog Joints,Dessert Shops,BBQ Joints,Cafés,Fast Food Restaurants,Bowling Alleys,Diners,Bakeries,Korean Restaurants,Food Trucks
6,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,0.0,Coffee Shops,Bubble Tea Shops,Wings Joints,Burger Joints,Sandwich Places,Restaurants,Fast Food Restaurants,Indian Restaurants,Ramen Restaurants,Poke Places
7,M7A,Downtown Toronto,Ontario Provincial Government,43.662301,-79.389494,0.0,Coffee Shops,Bubble Tea Shops,Wings Joints,Burger Joints,Sandwich Places,Restaurants,Fast Food Restaurants,Indian Restaurants,Ramen Restaurants,Poke Places
8,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,3.0,Pizza Places,Fast Food Restaurants,Wings Joints,Dim Sum Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners,Dessert Shops
9,M1B,Scarborough,Malvern,43.806686,-79.194353,0.0,Indian Restaurants,Coffee Shops,Fast Food Restaurants,Wings Joints,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners,Dim Sum Restaurants


Finally, let's visualize the resulting clusters

In [63]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 5. Examine Clusters ##

Now, you can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, you can then assign a name to each cluster. I will leave this exercise to you.

#### Cluster 1 ####

In [64]:
cluster1 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1,2] + list(range(5, toronto_merged.shape[1]))]]

cluster1.reset_index(drop=True)

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Parkwoods,0.0,Fast Food Restaurants,Wings Joints,Dim Sum Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners,Dessert Shops,English Restaurants
1,North York,Victoria Village,0.0,Pizza Places,Coffee Shops,Portuguese Restaurants,French Restaurants,Burger Joints,Dim Sum Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants
2,Downtown Toronto,Regent Park,0.0,Coffee Shops,Italian Restaurants,Cafés,Bakeries,Mediterranean Restaurants,Restaurants,Chinese Restaurants,Breakfast Spots,Pizza Places,Sandwich Places
3,Downtown Toronto,Harbourfront,0.0,Coffee Shops,Italian Restaurants,Cafés,Bakeries,Mediterranean Restaurants,Restaurants,Chinese Restaurants,Breakfast Spots,Pizza Places,Sandwich Places
4,North York,Lawrence Manor,0.0,Hot Dog Joints,Dessert Shops,BBQ Joints,Cafés,Fast Food Restaurants,Bowling Alleys,Diners,Bakeries,Korean Restaurants,Food Trucks
5,North York,Lawrence Heights,0.0,Hot Dog Joints,Dessert Shops,BBQ Joints,Cafés,Fast Food Restaurants,Bowling Alleys,Diners,Bakeries,Korean Restaurants,Food Trucks
6,Downtown Toronto,Queen's Park,0.0,Coffee Shops,Bubble Tea Shops,Wings Joints,Burger Joints,Sandwich Places,Restaurants,Fast Food Restaurants,Indian Restaurants,Ramen Restaurants,Poke Places
7,Downtown Toronto,Ontario Provincial Government,0.0,Coffee Shops,Bubble Tea Shops,Wings Joints,Burger Joints,Sandwich Places,Restaurants,Fast Food Restaurants,Indian Restaurants,Ramen Restaurants,Poke Places
8,Scarborough,Malvern,0.0,Indian Restaurants,Coffee Shops,Fast Food Restaurants,Wings Joints,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners,Dim Sum Restaurants
9,Scarborough,Rouge,0.0,Indian Restaurants,Coffee Shops,Fast Food Restaurants,Wings Joints,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Doner Restaurants,Diners,Dim Sum Restaurants


In [65]:
African_resto_count_0=0
for i in range(3,len(cluster1.columns)):
    African_resto_count_0= African_resto_count_0 + cluster1[cluster1.columns[i]].str.count('African Restaurants').sum()
print('African Restaurants in Cluster 1 are',African_resto_count_0)

African Restaurants in Cluster 1 are 3


In [66]:
print('Percentage of African Restaurants in Cluster 1 is {0:.2f}%'.format((African_resto_count_0/cluster1.shape[0])*100))

Percentage of African Restaurants in Cluster 1 is 1.96%


#### This is a very low number, lets see if people in this cluster are inclined towards food that uses similar ingredients or spice levels. African Dishes are in a lot of ways very identical to Carribean Dishes, lets see if they are in high numbers too ####

In [67]:
African_resto_count_0=0
for i in range(3,len(cluster1.columns)):
    African_resto_count_0= African_resto_count_0 + cluster1[cluster1.columns[i]].str.count('African Restaurants').sum()+cluster1[cluster1.columns[i]].str.count('Caribbean Restaurants').sum()
print('African and Caribbean Restaurants in Cluster 1 are ',African_resto_count_0)

African and Caribbean Restaurants in Cluster 1 are  22


In [68]:
print('Percentage of African and Caribbean Restaurants in Cluster 1 is {0:.2f}%'.format((African_resto_count_0/cluster1.shape[0])*100))

Percentage of African and Caribbean Restaurants in Cluster 1 is 14.38%


#### As expected, just a few people in cluster 1 are inclined towards African and Carribean Dishes, so far these neighborhoods look like an ideal option. ####

#### Now lets find out the neighborhoods where there are no African Restaurants in this cluster, that would be the list of neighborhoods an entrepreneur can invest and start an African Restaurant. ####

In [385]:
SuitableNeighbourhoods=[]
for j in range(len(cluster1)):
    if cluster1.iloc[j,:].str.contains('African Restaurants').any():
        continue
    else:
        SuitableNeighbourhoods.append(cluster1.iloc[j,0])
print(SuitableNeighbourhoods)

['Parkwoods', 'Victoria Village', 'Regent Park ', ' Harbourfront', 'Lawrence Manor ', ' Lawrence Heights', "Queen's Park ", ' Ontario Provincial Government', 'Malvern ', ' Rouge', 'Don Mills', 'Parkview Hill ', ' Woodbine Gardens', 'Garden District, Ryerson', 'Glencairn', 'Don Mills', 'Woodbine Heights', 'St. James Town', 'Humewood-Cedarvale', 'Eringate ', ' Bloordale Gardens ', ' Old Burnhamthorpe ', ' Markland Wood', 'Guildwood ', ' Morningside ', ' West Hill', 'The Beaches', 'Berczy Park', 'Woburn', 'Leaside', 'Central Bay Street', 'Christie', 'Cedarbrae', 'Hillcrest Village', 'Bathurst Manor ', ' Wilson Heights ', ' Downsview North', 'Thorncliffe Park', 'Richmond ', ' Adelaide ', ' King', 'Dufferin ', ' Dovercourt Village', 'Fairview ', ' Henry Farm ', ' Oriole', 'Northwood Park ', ' York University', 'East Toronto', 'Harbourfront East ', ' Union Station ', ' Toronto Islands', 'Little Portugal ', ' Trinity', 'Kennedy Park ', ' Ionview ', ' East Birchmount Park', 'Bayview Village', 

#### Cluster 2 ####

In [387]:
cluster2 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

cluster2.reset_index(drop=True)

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Cliffside,1.0,American Restaurants,Wings Joints,Donut Shops,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants,Diners
1,Cliffcrest,1.0,American Restaurants,Wings Joints,Donut Shops,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants,Diners
2,Scarborough Village West,1.0,American Restaurants,Wings Joints,Donut Shops,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants,Diners


In [255]:
African_resto_count_1=0
for i in range(3,len(cluster2.columns)):
    African_resto_count_1= African_resto_count_1 + cluster2[cluster2.columns[i]].str.count('African Restaurants').sum()
print('African Restaurants in Cluster 2 are ',African_resto_count_1)

African Restaurants in Cluster 2 are  0


In [256]:
African_resto_count_1=0
for i in range(3,len(cluster2.columns)):
    African_resto_count_1= African_resto_count_1 + cluster2[cluster2.columns[i]].str.count('African Restaurants').sum()+cluster2[cluster2.columns[i]].str.count('Caribbean Restaurants').sum()
print('African and Caribbean Restaurants in Cluster 2 are ',African_resto_count_1)

African and Caribbean Restaurants in Cluster 2 are  0


#### In Cluster 2, there are no African or Caribbean Restaurants, this is not an ideal option. ####

#### Cluster 3 #### 

In [257]:
cluster3 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

cluster3.reset_index(drop=True)

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops
1,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops
2,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops
3,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops
4,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops
5,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops
6,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops
7,Etobicoke,2.0,Breakfast Spots,Delis / Bodegas,Wings Joints,Dumpling Restaurants,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Donut Shops


In [258]:
African_resto_count_2=0
for i in range(3,len(cluster3.columns)):
    African_resto_count_2= African_resto_count_2 + cluster3[cluster3.columns[i]].str.count('African Restaurants').sum()
print('African Restaurants in Cluster 3 are ',African_resto_count_2)

African Restaurants in Cluster 3 are  0


#### Cluster 3 is same as cluster 2, and therefore is not an ideal option either ####

#### Cluster 4 #### 

In [259]:
cluster4 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

cluster4.reset_index(drop=True)

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,3.0,Pizza Places,Fast Food Restaurants,Diners,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants,Donut Shops,Dim Sum Restaurants,Ethiopian Restaurants
1,Etobicoke,3.0,Pizza Places,Breakfast Spots,Filipino Restaurants,Chinese Restaurants,Tea Rooms,Sandwich Places,Diners,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants
2,Etobicoke,3.0,Pizza Places,Breakfast Spots,Filipino Restaurants,Chinese Restaurants,Tea Rooms,Sandwich Places,Diners,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants
3,Etobicoke,3.0,Pizza Places,Breakfast Spots,Filipino Restaurants,Chinese Restaurants,Tea Rooms,Sandwich Places,Diners,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants
4,Etobicoke,3.0,Pizza Places,Breakfast Spots,Filipino Restaurants,Chinese Restaurants,Tea Rooms,Sandwich Places,Diners,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants
5,Etobicoke,3.0,Pizza Places,Breakfast Spots,Filipino Restaurants,Chinese Restaurants,Tea Rooms,Sandwich Places,Diners,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants
6,York,3.0,Bakeries,Portuguese Restaurants,Cafés,Mexican Restaurants,Wings Joints,Donut Shops,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants
7,Scarborough,3.0,Pizza Places,Middle Eastern Restaurants,Caribbean Restaurants,Cuban Restaurants,Cupcake Shops,Delis / Bodegas,Dessert Shops,Dim Sum Restaurants,Diners,Ethiopian Restaurants
8,North York,3.0,Delis / Bodegas,Bakeries,Wings Joints,Donut Shops,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants
9,North York,3.0,Delis / Bodegas,Bakeries,Wings Joints,Donut Shops,Ethiopian Restaurants,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants


In [262]:
African_resto_count_3=0
for i in range(3,len(cluster4.columns)):
    African_resto_count_3= African_resto_count_3 + cluster4[cluster4.columns[i]].str.count('African Restaurants').sum()
print('African Restaurants in Cluster 4 are',African_resto_count_3)

African Restaurants in Cluster 4 are 0


In [263]:
African_resto_count_3=0
for i in range(3,len(cluster4.columns)):
    African_resto_count_3= African_resto_count_3 + cluster4[cluster4.columns[i]].str.count('African Restaurants').sum()+cluster4[cluster4.columns[i]].str.count('Caribbean Restaurants').sum()
print('African and Caribbean Restaurants in Cluster 4 are ',African_resto_count_3)

African and Caribbean Restaurants in Cluster 4 are  14


In [264]:
print('Percentage of African and Caribbean Restaurants in Cluster 4 is {0:.2f}%'.format((African_resto_count_3/cluster4.shape[0])*100))

Percentage of African and Caribbean Restaurants in Cluster 4 is 45.16%


#### Cluster 4 does not look like the ideal option. Though this cluster has a high percentage of Caribbean Restaurants, there is no African Restaurant found. ####

#### Cluster 5 #### 

In [260]:
cluster5 = toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

cluster5.reset_index(drop=True)

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4.0,Cafeterias,Wings Joints,Donut Shops,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants,Diners,Event Spaces
1,North York,4.0,Cafeterias,Wings Joints,Donut Shops,English Restaurants,Empanada Restaurants,Egyptian Restaurants,Eastern European Restaurants,Dumpling Restaurants,Diners,Event Spaces


#### Cluster 5 is same as cluster 2 and 3, therefore is not an ideal option either. ####