# Importing the Dataset

In [153]:
import pandas as pd
import numpy as np

In [154]:
toronto_data = pd.read_excel('toronto neighborhoods.xlsx')

In [155]:
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [156]:
#drop the rows in which Borough is not mentioned
toronto_data.dropna(axis=0, inplace=True)
toronto_data.head()



Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


This is the required dataframe. Now we will see the shape of the dataframe

In [157]:
print('The shape of the dataframe is', toronto_data.shape)

The shape of the dataframe is (103, 3)


# Getting Location Coordinates

Now we will import the dataset containing the latitude and longitudes for the different Neighborhoods.

In [158]:
tor_lat_long = pd.read_csv('Geospatial_Coordinates.csv')

In [159]:
tor_lat_long.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


We try to filter this dataframe to get only those postal codes which are present in our location dataframe, i.e, toronto_data.

In [160]:
toronto_locations = pd.merge(toronto_data, tor_lat_long, how='left', on='Postal Code')
toronto_locations.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


This is the desired dataframe having the Postal Codes, Boroughs,Neighbourhoods and their corresponding Latitude & Longitudes.

In [161]:
print('The shape of this datadrame is', toronto_locations.shape)

The shape of this datadrame is (103, 5)


# Let's plot the different Neighborhoods on a map

In [162]:
import folium
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [163]:
address = 'Toronto'
geolocator = Nominatim(user_agent = "drake's city")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The location coordinates of Toronto are ({},{})'.format(latitude, longitude))

The location coordinates of Toronto are (43.6534817,-79.3839347)


In [164]:
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, long, neighborhood, borough in zip(toronto_locations['Latitude'] , toronto_locations['Longitude'] , toronto_locations['Neighborhood'], toronto_locations['Borough']):
    
    label = '{},{}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.features.CircleMarker(location=[lat, long],
                                 popup=label,
                                 color='blue',
                                 fill=True,
                                 fill_color='#3186cc',
                                 fill_opacity=0.5,
                                 radius=5,
                                 parse_html=False).add_to(toronto_map)


toronto_map

#### Now let us analyze the neighborhoods which exist in boroughs containing the word 'Toronto' in them.

For this we will have to filter our dataframe 'toronto_loctions'

In [165]:
toronto_locations.drop('Postal Code', axis=1, inplace=True)
toronto_locations.reset_index(drop=True).head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [166]:
print('The shape of the filtered dataset is {}'.format(toronto_locations.shape))

The shape of the filtered dataset is (103, 4)


In [167]:
toronto_locations['Borough'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

# Getting the venues in all the neighborhoods in all the boroughs

We first import the required libraries to handle requests and json files

In [168]:
from pandas.io.json import json_normalize
import requests       # to handle requests

We will enter our credentials to use Foursquare API

In [169]:
 CLIENT_ID = 'WEZXAWILCFJJGUQME54IOZAU4HTTOW2UJCM4IBQQW0BAB2PL' # your Foursquare ID
CLIENT_SECRET = 'CHCA3UMMDMMYJM4SMKPSSIC0ORVW5L4S0N1QHYM2HUZHHOGF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

Let's retrieve the json file for the required neighborhoods and their venues, from Foursquare

In [170]:
RADIUS = 500
LIMIT = 100
venues_list = []
for name, lat, lng in zip(toronto_locations['Neighborhood'],toronto_locations['Latitude'], toronto_locations['Longitude']):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            RADIUS, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        for v in results:
            
            venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name'])])
        
        
venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
venues
    

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
The Danforth West, Ri

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
3,Parkwoods,43.753259,-79.329656,GreenWin pool,43.756232,-79.333842,Pool
4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
...,...,...,...,...,...,...,...
2107,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Jim & Maria's No Frills,43.631152,-79.518617,Grocery Store
2108,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Once Upon A Child,43.631075,-79.518290,Kids Store
2109,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Value Village,43.631269,-79.518238,Thrift / Vintage Store
2110,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Kingsway Boxing Club,43.627254,-79.526684,Gym


In [171]:
venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",10,10,10,10,10,10
"Bathurst Manor, Wilson Heights, Downsview North",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",23,23,23,23,23,23
...,...,...,...,...,...,...
"Wexford, Maryvale",5,5,5,5,5,5
Willowdale,39,39,39,39,39,39
Woburn,4,4,4,4,4,4
Woodbine Heights,8,8,8,8,8,8


In [172]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 272 uniques categories.


In [173]:
# one hot encoding
venues_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")


In [174]:
# add neighborhood column back to dataframe
venues_onehot['Neighborhood'] = venues['Neighborhood'] 


In [175]:
# move neighborhood column to the first column
fixed_columns = [venues_onehot.columns[-1]] + list(venues_onehot.columns[:-1])

venues_onehot = venues_onehot[fixed_columns]

In [176]:
venues_grouped = venues_onehot.groupby('Neighborhood').mean().reset_index()
venues_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.052632,0.000000,0.0,0.0,0.0,0.000000
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.043478
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,"Wexford, Maryvale",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
90,Willowdale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.025641,0.0,0.0,0.0,0.000000
91,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000
92,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.125000,0.000000,0.0,0.0,0.0,0.000000


### Let's print each neighborhood along with the top 5 most common venues

In [177]:
for hood in venues_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = venues_grouped[venues_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 1})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head())
    print('\n')
    
   

----Agincourt----
                       venue  freq
0                     Lounge   0.2
1             Breakfast Spot   0.2
2  Latin American Restaurant   0.2
3               Skating Rink   0.2
4                Men's Store   0.0


----Alderwood, Long Branch----
          venue  freq
0   Pizza Place   0.2
1      Pharmacy   0.1
2           Pub   0.1
3           Gym   0.1
4  Skating Rink   0.1


----Bathurst Manor, Wilson Heights, Downsview North----
              venue  freq
0          Pharmacy   0.1
1       Coffee Shop   0.1
2        Restaurant   0.1
3              Bank   0.1
4  Sushi Restaurant   0.1


----Bayview Village----
                             venue  freq
0                             Café   0.2
1              Japanese Restaurant   0.2
2                             Bank   0.2
3               Chinese Restaurant   0.2
4  Molecular Gastronomy Restaurant   0.0


----Bedford Park, Lawrence Manor East----
                venue  freq
0    Sushi Restaurant   0.1
1  Italian Restaurant

### Now we will form a dataframe which will show us the 10 most common venues 

Let's define a function which takes a row as a parameter and gives the 10 most common venues

In [178]:
def get_common_venues(row, n_venues):
    
    row_categories = row.iloc[1:]
    row_sorted = row_categories.sort_values(ascending=False)
    return row_sorted.index.values[0:n_venues]
    

Let's create a dataframe

In [179]:
suffix = ['st', 'nd', 'rd']
columns = ['Neighborhood']


for i in range(1,11):
    
    try:
        columns.append('{}{} most common venue'.format(i,suffix[i-1]))
        
    except:
        columns.append('{}th most common venue'.format(i))
        

        
category_sorted = pd.DataFrame(columns=columns)        
        
    
    
category_sorted

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue


Now let's fill this dataframe

In [180]:
category_sorted['Neighborhood'] = venues_grouped['Neighborhood']

for i in np.arange(venues_grouped.shape[0]):
    
    category_sorted.iloc[i, 1:] = get_common_venues(venues_grouped.iloc[i,:], 10)
    

category_sorted.head()  

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,Agincourt,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Pharmacy,Athletics & Sports,Pub,Sandwich Place,Dance Studio,Skating Rink,Gym,Dog Run
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Pizza Place,Pharmacy,Supermarket,Deli / Bodega,Sushi Restaurant,Shopping Mall,Middle Eastern Restaurant,Restaurant
3,Bayview Village,Japanese Restaurant,Café,Bank,Chinese Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Women's Store
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sushi Restaurant,Italian Restaurant,Sandwich Place,Greek Restaurant,Grocery Store,Indian Restaurant,Juice Bar,Liquor Store,Comfort Food Restaurant


In [181]:
category_sorted.shape

(94, 11)

# Let's Cluster Neighborhoods Based On The 10 Most Common Venues

We will import the libraries required for this

In [182]:
from sklearn.cluster import KMeans

In [183]:
k_means = KMeans(n_clusters=5, random_state=0)

In [184]:
cluster_df = venues_grouped.iloc[:, 1:]
cluster_df.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478


In [185]:
cluster_df.shape

(94, 271)

In [186]:
clustering = k_means.fit(cluster_df)
cluster_label = clustering.labels_ 
cluster_label

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0,
       1, 0, 0, 0, 0, 1], dtype=int32)

In [187]:
clustered_neigborhoods = category_sorted
clustered_neigborhoods['Cluster'] = cluster_label
clustered_neigborhoods

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue,Cluster
0,Agincourt,Latin American Restaurant,Lounge,Skating Rink,Breakfast Spot,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,0
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Pharmacy,Athletics & Sports,Pub,Sandwich Place,Dance Studio,Skating Rink,Gym,Dog Run,0
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Pizza Place,Pharmacy,Supermarket,Deli / Bodega,Sushi Restaurant,Shopping Mall,Middle Eastern Restaurant,Restaurant,0
3,Bayview Village,Japanese Restaurant,Café,Bank,Chinese Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Women's Store,0
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sushi Restaurant,Italian Restaurant,Sandwich Place,Greek Restaurant,Grocery Store,Indian Restaurant,Juice Bar,Liquor Store,Comfort Food Restaurant,0
...,...,...,...,...,...,...,...,...,...,...,...,...
89,"Wexford, Maryvale",Auto Garage,Sandwich Place,Breakfast Spot,Shopping Mall,Bakery,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,0
90,Willowdale,Pizza Place,Coffee Shop,Ramen Restaurant,Sandwich Place,Sushi Restaurant,Grocery Store,Café,Movie Theater,Ice Cream Shop,Steakhouse,0
91,Woburn,Coffee Shop,Indian Restaurant,Korean Restaurant,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,0
92,Woodbine Heights,Skating Rink,Park,Pharmacy,Beer Store,Cosmetics Shop,Curling Ice,Video Store,Comfort Food Restaurant,Electronics Store,College Rec Center,0


In [188]:
# To see the no. of neighborhoods in each cluster
clustered_neigborhoods.groupby('Cluster')['Neighborhood'].count()

Cluster
0    76
1    14
2     2
3     1
4     1
Name: Neighborhood, dtype: int64

Let's create a dataframe to view all the neighborhood, with their location, 10 most common venues and cluster

In [189]:
final_df = pd.merge(toronto_locations, clustered_neigborhoods, how='right', on='Neighborhood')
final_df

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue,Cluster
0,North York,Parkwoods,43.753259,-79.329656,Park,Food & Drink Shop,Bus Stop,Pool,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Dance Studio,1
1,North York,Victoria Village,43.725882,-79.315572,Intersection,Coffee Shop,Financial or Legal Service,French Restaurant,Portuguese Restaurant,Hockey Arena,Women's Store,Distribution Center,Dessert Shop,Dim Sum Restaurant,0
2,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,Coffee Shop,Pub,Bakery,Park,Café,Breakfast Spot,Theater,Health Food Store,Historic Site,Hotel,0
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,Women's Store,Coffee Shop,Miscellaneous Shop,Event Space,Carpet Store,Arts & Crafts Store,Clothing Store,Furniture / Home Store,Accessories Store,Vietnamese Restaurant,0
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Coffee Shop,Yoga Studio,Distribution Center,Bank,Bar,Beer Bar,Smoothie Shop,Italian Restaurant,Sandwich Place,Burrito Place,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,Park,Pool,River,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,1
95,Downtown Toronto,Church and Wellesley,43.665860,-79.383160,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Gay Bar,Bubble Tea Shop,Pub,Café,Yoga Studio,Gastropub,0
96,East Toronto,Business reply mail Processing Centre,43.662744,-79.321558,Gym / Fitness Center,Spa,Auto Workshop,Brewery,Burrito Place,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center,0
97,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,Baseball Field,Locksmith,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Women's Store,2


In [190]:
cluster_2 = final_df['Cluster']
cluster_2.shape

(99,)

In [191]:
final_df.drop('Cluster', axis=1, inplace=True)
final_df.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,North York,Parkwoods,43.753259,-79.329656,Park,Food & Drink Shop,Bus Stop,Pool,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Dance Studio
1,North York,Victoria Village,43.725882,-79.315572,Intersection,Coffee Shop,Financial or Legal Service,French Restaurant,Portuguese Restaurant,Hockey Arena,Women's Store,Distribution Center,Dessert Shop,Dim Sum Restaurant
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Coffee Shop,Pub,Bakery,Park,Café,Breakfast Spot,Theater,Health Food Store,Historic Site,Hotel
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,Women's Store,Coffee Shop,Miscellaneous Shop,Event Space,Carpet Store,Arts & Crafts Store,Clothing Store,Furniture / Home Store,Accessories Store,Vietnamese Restaurant
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Coffee Shop,Yoga Studio,Distribution Center,Bank,Bar,Beer Bar,Smoothie Shop,Italian Restaurant,Sandwich Place,Burrito Place


In [196]:
final_df.insert(4, 'Cluster', cluster_2)

ValueError: cannot insert Cluster, already exists

In [195]:
final_df

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,North York,Parkwoods,43.753259,-79.329656,1,Park,Food & Drink Shop,Bus Stop,Pool,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Dance Studio
1,North York,Victoria Village,43.725882,-79.315572,0,Intersection,Coffee Shop,Financial or Legal Service,French Restaurant,Portuguese Restaurant,Hockey Arena,Women's Store,Distribution Center,Dessert Shop,Dim Sum Restaurant
2,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,0,Coffee Shop,Pub,Bakery,Park,Café,Breakfast Spot,Theater,Health Food Store,Historic Site,Hotel
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Women's Store,Coffee Shop,Miscellaneous Shop,Event Space,Carpet Store,Arts & Crafts Store,Clothing Store,Furniture / Home Store,Accessories Store,Vietnamese Restaurant
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Yoga Studio,Distribution Center,Bank,Bar,Beer Bar,Smoothie Shop,Italian Restaurant,Sandwich Place,Burrito Place
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,1,Park,Pool,River,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
95,Downtown Toronto,Church and Wellesley,43.665860,-79.383160,0,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Gay Bar,Bubble Tea Shop,Pub,Café,Yoga Studio,Gastropub
96,East Toronto,Business reply mail Processing Centre,43.662744,-79.321558,0,Gym / Fitness Center,Spa,Auto Workshop,Brewery,Burrito Place,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center
97,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,2,Baseball Field,Locksmith,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Women's Store


## Let's display these clusters on the map

In [201]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(5)
ys = [i + x + (i*x)**2 for i in range(5)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(final_df['Latitude'], final_df['Longitude'], final_df['Neighborhood'], final_df['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examining all the clusters

### Cluster 1

In [203]:
final_df.loc[final_df['Cluster']==0, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
1,Victoria Village,Intersection,Coffee Shop,Financial or Legal Service,French Restaurant,Portuguese Restaurant,Hockey Arena,Women's Store,Distribution Center,Dessert Shop,Dim Sum Restaurant
2,"Regent Park, Harbourfront",Coffee Shop,Pub,Bakery,Park,Café,Breakfast Spot,Theater,Health Food Store,Historic Site,Hotel
3,"Lawrence Manor, Lawrence Heights",Women's Store,Coffee Shop,Miscellaneous Shop,Event Space,Carpet Store,Arts & Crafts Store,Clothing Store,Furniture / Home Store,Accessories Store,Vietnamese Restaurant
4,"Queen's Park, Ontario Provincial Government",Coffee Shop,Yoga Studio,Distribution Center,Bank,Bar,Beer Bar,Smoothie Shop,Italian Restaurant,Sandwich Place,Burrito Place
5,"Malvern, Rouge",Fast Food Restaurant,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Doner Restaurant,Dog Run
...,...,...,...,...,...,...,...,...,...,...,...
92,"St. James Town, Cabbagetown",Coffee Shop,Café,Bakery,Pizza Place,Italian Restaurant,Restaurant,Pub,Playground,Plaza,Pharmacy
93,"First Canadian Place, Underground city",Coffee Shop,Café,Restaurant,Gym,Hotel,Japanese Restaurant,Seafood Restaurant,American Restaurant,Steakhouse,Salad Place
95,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Gay Bar,Bubble Tea Shop,Pub,Café,Yoga Studio,Gastropub
96,Business reply mail Processing Centre,Gym / Fitness Center,Spa,Auto Workshop,Brewery,Burrito Place,Comic Shop,Farmers Market,Fast Food Restaurant,Garden,Garden Center


### Cluster 2

In [204]:
final_df.loc[final_df['Cluster']==1, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
0,Parkwoods,Park,Food & Drink Shop,Bus Stop,Pool,Drugstore,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Dance Studio
10,Glencairn,Park,Asian Restaurant,Pub,Japanese Restaurant,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
20,Caledonia-Fairbanks,Women's Store,Pool,Park,Afghan Restaurant,Deli / Bodega,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
31,Scarborough Village,Playground,Women's Store,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
34,East Toronto,Park,Convenience Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Deli / Bodega
49,"North Park, Maple Leaf Park, Upwood Park",Park,Massage Studio,Bakery,Construction & Landscaping,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
59,Lawrence Park,Park,Swim School,Bus Line,Dim Sum Restaurant,Distribution Center,Dessert Shop,Diner,Discount Store,Dog Run,Deli / Bodega
62,Weston,Park,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant,Gym
64,York Mills West,Park,Bank,Convenience Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Department Store,Doner Restaurant
66,Forest Hill North & West,Park,Jewelry Store,Sushi Restaurant,Bus Line,Trail,Donut Shop,Drugstore,Doner Restaurant,Dog Run,Deli / Bodega


### Cluster 3

In [205]:
final_df.loc[final_df['Cluster']==2, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
55,"Humberlea, Emery",Baseball Field,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Women's Store,Farmers Market
97,"Old Mill South, King's Mill Park, Sunnylea, Hu...",Baseball Field,Locksmith,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Women's Store


### Cluster 4

In [206]:
final_df.loc[final_df['Cluster']==3, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
50,Humber Summit,Shopping Mall,Women's Store,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop


### Cluster 5

In [207]:
final_df.loc[final_df['Cluster']==4, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Neighborhood,1st most common venue,2nd most common venue,3rd most common venue,4th most common venue,5th most common venue,6th most common venue,7th most common venue,8th most common venue,9th most common venue,10th most common venue
11,"West Deane Park, Princess Gardens, Martin Grov...",Golf Course,Women's Store,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
