# Battle of Neighbourhoods

## 

## Part 1 - Web Scraping and Prepare DataFrame

In [87]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [88]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikipage = requests.get(url).text
soup = BeautifulSoup(wikipage,'lxml')
table = soup.find('table')

In [89]:
PCodes,boros,neighs = [],[],[]
for row in table.find_all('tr'):
    cols = row.find_all('td')
    for col in cols:
        # Assuming the text data of in the cell will have necessary information as required
        info = col.get_text().strip()
        if 'not assigned' not in info.lower(): # Assuming if the Postal Code has not been assigned, ignore the data
            PCodes.append(info[:3])
            i = info.find('(')
            boros.append(info[3:i])
            neighs.append(info[i+1:-1].replace('/',','))
        
datadict = {'PostalCode':PCodes,'Borough':boros,'Neighbourhood':neighs}
toronto_df = pd.DataFrame()
toronto_df = toronto_df.from_dict(datadict)
toronto_df.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills)Nort
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [90]:
toronto_df.shape

(103, 3)

# 

# Part 2 - Getting the Latitude and Logitude Data

# 

In [91]:
# Using CSV file as below code does not take a finite time.
'''
import geocoder # import geocoder

def get_coordinates(postal_code):
    
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng

    return lat_lng_coords
    
latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

toronto_df['Lats'] = toronto_df.apply(lambda x: get_coordinates(x['PostalCode']),axis=1)
toronto_df
'''

"\nimport geocoder # import geocoder\n\ndef get_coordinates(postal_code):\n    \n    # initialize your variable to None\n    lat_lng_coords = None\n\n    # loop until you get the coordinates\n    while(lat_lng_coords is None):\n      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))\n      lat_lng_coords = g.latlng\n\n    return lat_lng_coords\n    \nlatitude = lat_lng_coords[0]\nlongitude = lat_lng_coords[1]\n\ntoronto_df['Lats'] = toronto_df.apply(lambda x: get_coordinates(x['PostalCode']),axis=1)\ntoronto_df\n"

In [92]:
latlong = pd.read_csv('Geospatial_Coordinates.csv')
toronto_df = pd.merge(toronto_df,latlong,left_on='PostalCode',right_on='Postal Code')
toronto_df.drop('Postal Code',axis=1,inplace=True)

In [93]:
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


In [94]:
toronto_df.isnull().values.any()

False

In [95]:
toronto_df.shape

(103, 5)

In [96]:
toronto_df['Borough'].unique()

array(['North York', 'Downtown Toronto', "Queen's Park", 'Etobicoke',
       'Scarborough', 'East York', 'York', 'East Toronto', 'West Toronto',
       'East YorkEast Toronto', 'Central Toronto',
       'MississaugaCanada Post Gateway Processing Centre',
       'Downtown TorontoStn A PO Boxes25 The Esplanade',
       'EtobicokeNorthwest',
       'East TorontoBusiness reply mail Processing Centre969 Eastern'],
      dtype=object)

# 

# Part 3 Explore and Cluster Neighbourhoods in Toronto

# 

In [147]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

from pandas.io.json import json_normalize

import numpy as np

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library


In [98]:
print('This dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_df['Borough'].unique()),
        toronto_df.shape[0]
    )
)

This dataframe has 15 boroughs and 103 neighborhoods.


### For Convinience reason im taking the top 11 Boroughs of Toronto and will continue processing with the data

In [99]:
filt_boro = list(toronto_df['Borough'].unique())[:11]
filt_boro

['North York',
 'Downtown Toronto',
 "Queen's Park",
 'Etobicoke',
 'Scarborough',
 'East York',
 'York',
 'East Toronto',
 'West Toronto',
 'East YorkEast Toronto',
 'Central Toronto']

In [100]:
new_toronto_df = toronto_df[toronto_df['Borough'].isin(filt_boro)]
new_toronto_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
97,M5X,Downtown Toronto,"First Canadian Place , Underground city",43.648429,-79.382280
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


In [101]:
new_toronto_df.shape

(99, 5)

## Visualizing Downtown Toronto

In [102]:
downtown_df = new_toronto_df[new_toronto_df['Borough']=='Downtown Toronto'].reset_index(drop=True)
downtown_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
5,M6G,Downtown Toronto,Christie,43.669542,-79.422564
6,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568
7,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",43.640816,-79.381752
8,M5K,Downtown Toronto,"Toronto Dominion Centre , Design Exchange",43.647177,-79.381576
9,M5L,Downtown Toronto,"Commerce Court , Victoria Hotel",43.648198,-79.379817


In [103]:
address = "Downtown Toronto, Toronto"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("The geograpical coordinate of {} are {}, {}".format(address,latitude, longitude))

The geograpical coordinate of Downtown Toronto, Toronto are 43.6541737, -79.38081164513409


In [104]:
# create map of downtown using latitude and longitude values
map_downtown = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(downtown_df['Latitude'], downtown_df['Longitude'], downtown_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
map_downtown

### Foursquare Credentials

In [105]:
CLIENT_ID = 'QH1IBX41PG0PBDOCVGOQNLQXJA52KXGEXJLK10YET1YMH5GM' # your Foursquare ID
CLIENT_SECRET = 'NM5A2OJVLPFYV02UIBKUCH2F5FLYQBJESG2NCRBJ3QIOXTEQ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

### Will explore first Neighbourhood of Toronto : Downtown

In [106]:
neighbourhood_name = downtown_df.loc[0,'Neighbourhood']
neighbourhood_lat = downtown_df.loc[0,'Latitude']
neighbourhood_long = downtown_df.loc[0,'Longitude']

# type your answer here
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_lat, 
    neighbourhood_long, 
    radius, 
    LIMIT)
url # display URL

results = requests.get(url).json()

In [107]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
    
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Tandem Coffee,Coffee Shop,43.653559,-79.361809
1,Roselle Desserts,Bakery,43.653447,-79.362017
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Impact Kitchen,Restaurant,43.656369,-79.35698
4,Body Blitz Spa East,Spa,43.654735,-79.359874


In [108]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

45 venues were returned by Foursquare.


### Explore all filtered neighbourhoods of Toronto

In [109]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

toronto_venues = getNearbyVenues(names=downtown_df['Neighbourhood'],latitudes=downtown_df['Latitude'],
                                      longitudes = downtown_df['Longitude'])

Regent Park , Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond , Adelaide , King
Harbourfront East , Union Station , Toronto Islands
Toronto Dominion Centre , Design Exchange
Commerce Court , Victoria Hotel
University of Toronto , Harbord
Kensington Market , Chinatown , Grange Park
CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport
Rosedale
St. James Town , Cabbagetown
First Canadian Place , Underground city
Church and Wellesley


In [119]:
print(toronto_venues.shape)
toronto_venues.head()

(1090, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park , Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [120]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,60,60,60,60,60,60
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport",16,16,16,16,16,16
Central Bay Street,63,63,63,63,63,63
Christie,16,16,16,16,16,16
Church and Wellesley,72,72,72,72,72,72
"Commerce Court , Victoria Hotel",100,100,100,100,100,100
"First Canadian Place , Underground city",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Harbourfront East , Union Station , Toronto Islands",100,100,100,100,100,100
"Kensington Market , Chinatown , Grange Park",63,63,63,63,63,63


In [112]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 202 uniques categories.


### Analyzing each neighbourhood of DownTown Toronto

In [130]:
# one hot encoding
downtown_onehot = pd.get_dummies(toronto_venues[['Venue Category']])
# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

downtown_onehot.tail()

Unnamed: 0,Neighborhood,Venue Category_Adult Boutique,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,Venue Category_Antique Shop,...,Venue Category_Thai Restaurant,Venue Category_Theater,Venue Category_Theme Restaurant,Venue Category_Trail,Venue Category_Train Station,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Vietnamese Restaurant,Venue Category_Wine Bar,Venue Category_Yoga Studio
1085,Church and Wellesley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1086,Church and Wellesley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1087,Church and Wellesley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1088,Church and Wellesley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1089,Church and Wellesley,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [127]:
downtown_onehot.shape

(1090, 203)

### Lets find the mean of hot encoded categorical variables in a neighbourhood

In [129]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped.shape

(17, 203)

In [140]:
downtown_grouped

Unnamed: 0,Neighborhood,Venue Category_Adult Boutique,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,Venue Category_Antique Shop,...,Venue Category_Thai Restaurant,Venue Category_Theater,Venue Category_Theme Restaurant,Venue Category_Trail,Venue Category_Train Station,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Vietnamese Restaurant,Venue Category_Wine Bar,Venue Category_Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.016667,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0
1,"CN Tower , King and Spadina , Railway Lands , ...",0.0,0.0625,0.0625,0.0625,0.125,0.0625,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.031746,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.015873,0.015873
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,...,0.013889,0.013889,0.013889,0.0,0.0,0.0,0.0,0.0,0.0,0.027778
5,"Commerce Court , Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0
6,"First Canadian Place , Underground city",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.02,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0
8,"Harbourfront East , Union Station , Toronto Is...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0
9,"Kensington Market , Chinatown , Grange Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.015873,0.0


In [144]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                           venue  freq
0     Venue Category_Coffee Shop  0.08
1    Venue Category_Cocktail Bar  0.05
2          Venue Category_Bakery  0.05
3        Venue Category_Pharmacy  0.03
4  Venue Category_Farmers Market  0.03


----CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport----
                                venue  freq
0       Venue Category_Airport Lounge  0.12
1     Venue Category_Airport Terminal  0.12
2        Venue Category_Boat or Ferry  0.06
3              Venue Category_Airport  0.06
4  Venue Category_Rental Car Location  0.06


----Central Bay Street----
                                venue  freq
0          Venue Category_Coffee Shop  0.17
1   Venue Category_Italian Restaurant  0.05
2       Venue Category_Sandwich Place  0.05
3                 Venue Category_Café  0.05
4  Venue Category_Japanese Restaurant  0.03


----Christie----
                          venue  freq
0  Venue 

In [145]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [150]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(20)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Venue Category_Coffee Shop,Venue Category_Cocktail Bar,Venue Category_Bakery,Venue Category_Cheese Shop,Venue Category_Farmers Market,Venue Category_Pharmacy,Venue Category_Restaurant,Venue Category_Seafood Restaurant,Venue Category_Beer Bar,Venue Category_Thai Restaurant
1,"CN Tower , King and Spadina , Railway Lands , ...",Venue Category_Airport Lounge,Venue Category_Airport Terminal,Venue Category_Plane,Venue Category_Harbor / Marina,Venue Category_Sculpture Garden,Venue Category_Boat or Ferry,Venue Category_Rental Car Location,Venue Category_Bar,Venue Category_Coffee Shop,Venue Category_Boutique
2,Central Bay Street,Venue Category_Coffee Shop,Venue Category_Italian Restaurant,Venue Category_Sandwich Place,Venue Category_Café,Venue Category_Thai Restaurant,Venue Category_Salad Place,Venue Category_Bubble Tea Shop,Venue Category_Burger Joint,Venue Category_Japanese Restaurant,Venue Category_Modern European Restaurant
3,Christie,Venue Category_Grocery Store,Venue Category_Café,Venue Category_Park,Venue Category_Candy Store,Venue Category_Italian Restaurant,Venue Category_Athletics & Sports,Venue Category_Restaurant,Venue Category_Baby Store,Venue Category_Nightclub,Venue Category_Coffee Shop
4,Church and Wellesley,Venue Category_Coffee Shop,Venue Category_Japanese Restaurant,Venue Category_Sushi Restaurant,Venue Category_Restaurant,Venue Category_Yoga Studio,Venue Category_Pub,Venue Category_Men's Store,Venue Category_Mediterranean Restaurant,Venue Category_Hotel,Venue Category_Gay Bar
5,"Commerce Court , Victoria Hotel",Venue Category_Coffee Shop,Venue Category_Restaurant,Venue Category_Café,Venue Category_Hotel,Venue Category_Gym,Venue Category_Italian Restaurant,Venue Category_American Restaurant,Venue Category_Seafood Restaurant,Venue Category_Cocktail Bar,Venue Category_Japanese Restaurant
6,"First Canadian Place , Underground city",Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Hotel,Venue Category_Restaurant,Venue Category_Japanese Restaurant,Venue Category_Gym,Venue Category_Asian Restaurant,Venue Category_Seafood Restaurant,Venue Category_Salad Place,Venue Category_Deli / Bodega
7,"Garden District, Ryerson",Venue Category_Clothing Store,Venue Category_Coffee Shop,Venue Category_Cosmetics Shop,Venue Category_Bubble Tea Shop,Venue Category_Café,Venue Category_Middle Eastern Restaurant,Venue Category_Japanese Restaurant,Venue Category_Fast Food Restaurant,Venue Category_Bookstore,Venue Category_Ramen Restaurant
8,"Harbourfront East , Union Station , Toronto Is...",Venue Category_Coffee Shop,Venue Category_Aquarium,Venue Category_Hotel,Venue Category_Café,Venue Category_Sporting Goods Shop,Venue Category_Italian Restaurant,Venue Category_Scenic Lookout,Venue Category_Brewery,Venue Category_Restaurant,Venue Category_Fried Chicken Joint
9,"Kensington Market , Chinatown , Grange Park",Venue Category_Café,Venue Category_Coffee Shop,Venue Category_Vietnamese Restaurant,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Mexican Restaurant,Venue Category_Grocery Store,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Park,Venue Category_Bar


## Cluster Neighborhoods

In [152]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 3, 0, 2, 0, 0, 0, 0, 0, 0], dtype=int32)

In [153]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Berczy Park,Venue Category_Coffee Shop,Venue Category_Cocktail Bar,Venue Category_Bakery,Venue Category_Cheese Shop,Venue Category_Farmers Market,Venue Category_Pharmacy,Venue Category_Restaurant,Venue Category_Seafood Restaurant,Venue Category_Beer Bar,Venue Category_Thai Restaurant
1,3,"CN Tower , King and Spadina , Railway Lands , ...",Venue Category_Airport Lounge,Venue Category_Airport Terminal,Venue Category_Plane,Venue Category_Harbor / Marina,Venue Category_Sculpture Garden,Venue Category_Boat or Ferry,Venue Category_Rental Car Location,Venue Category_Bar,Venue Category_Coffee Shop,Venue Category_Boutique
2,0,Central Bay Street,Venue Category_Coffee Shop,Venue Category_Italian Restaurant,Venue Category_Sandwich Place,Venue Category_Café,Venue Category_Thai Restaurant,Venue Category_Salad Place,Venue Category_Bubble Tea Shop,Venue Category_Burger Joint,Venue Category_Japanese Restaurant,Venue Category_Modern European Restaurant
3,2,Christie,Venue Category_Grocery Store,Venue Category_Café,Venue Category_Park,Venue Category_Candy Store,Venue Category_Italian Restaurant,Venue Category_Athletics & Sports,Venue Category_Restaurant,Venue Category_Baby Store,Venue Category_Nightclub,Venue Category_Coffee Shop
4,0,Church and Wellesley,Venue Category_Coffee Shop,Venue Category_Japanese Restaurant,Venue Category_Sushi Restaurant,Venue Category_Restaurant,Venue Category_Yoga Studio,Venue Category_Pub,Venue Category_Men's Store,Venue Category_Mediterranean Restaurant,Venue Category_Hotel,Venue Category_Gay Bar
5,0,"Commerce Court , Victoria Hotel",Venue Category_Coffee Shop,Venue Category_Restaurant,Venue Category_Café,Venue Category_Hotel,Venue Category_Gym,Venue Category_Italian Restaurant,Venue Category_American Restaurant,Venue Category_Seafood Restaurant,Venue Category_Cocktail Bar,Venue Category_Japanese Restaurant
6,0,"First Canadian Place , Underground city",Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Hotel,Venue Category_Restaurant,Venue Category_Japanese Restaurant,Venue Category_Gym,Venue Category_Asian Restaurant,Venue Category_Seafood Restaurant,Venue Category_Salad Place,Venue Category_Deli / Bodega
7,0,"Garden District, Ryerson",Venue Category_Clothing Store,Venue Category_Coffee Shop,Venue Category_Cosmetics Shop,Venue Category_Bubble Tea Shop,Venue Category_Café,Venue Category_Middle Eastern Restaurant,Venue Category_Japanese Restaurant,Venue Category_Fast Food Restaurant,Venue Category_Bookstore,Venue Category_Ramen Restaurant
8,0,"Harbourfront East , Union Station , Toronto Is...",Venue Category_Coffee Shop,Venue Category_Aquarium,Venue Category_Hotel,Venue Category_Café,Venue Category_Sporting Goods Shop,Venue Category_Italian Restaurant,Venue Category_Scenic Lookout,Venue Category_Brewery,Venue Category_Restaurant,Venue Category_Fried Chicken Joint
9,0,"Kensington Market , Chinatown , Grange Park",Venue Category_Café,Venue Category_Coffee Shop,Venue Category_Vietnamese Restaurant,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Mexican Restaurant,Venue Category_Grocery Store,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Park,Venue Category_Bar


In [156]:
downtown_df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [157]:

downtown_merged = downtown_df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,Venue Category_Coffee Shop,Venue Category_Park,Venue Category_Bakery,Venue Category_Pub,Venue Category_Breakfast Spot,Venue Category_Café,Venue Category_Theater,Venue Category_Mexican Restaurant,Venue Category_Restaurant,Venue Category_Chocolate Shop
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Venue Category_Clothing Store,Venue Category_Coffee Shop,Venue Category_Cosmetics Shop,Venue Category_Bubble Tea Shop,Venue Category_Café,Venue Category_Middle Eastern Restaurant,Venue Category_Japanese Restaurant,Venue Category_Fast Food Restaurant,Venue Category_Bookstore,Venue Category_Ramen Restaurant
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Gastropub,Venue Category_Cocktail Bar,Venue Category_Restaurant,Venue Category_Cosmetics Shop,Venue Category_Gym,Venue Category_Farmers Market,Venue Category_Moroccan Restaurant,Venue Category_Department Store
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Venue Category_Coffee Shop,Venue Category_Cocktail Bar,Venue Category_Bakery,Venue Category_Cheese Shop,Venue Category_Farmers Market,Venue Category_Pharmacy,Venue Category_Restaurant,Venue Category_Seafood Restaurant,Venue Category_Beer Bar,Venue Category_Thai Restaurant
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Venue Category_Coffee Shop,Venue Category_Italian Restaurant,Venue Category_Sandwich Place,Venue Category_Café,Venue Category_Thai Restaurant,Venue Category_Salad Place,Venue Category_Bubble Tea Shop,Venue Category_Burger Joint,Venue Category_Japanese Restaurant,Venue Category_Modern European Restaurant


In [159]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighbourhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [160]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, 
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Park,Venue Category_Bakery,Venue Category_Pub,Venue Category_Breakfast Spot,Venue Category_Café,Venue Category_Theater,Venue Category_Mexican Restaurant,Venue Category_Restaurant,Venue Category_Chocolate Shop
1,Downtown Toronto,0,Venue Category_Clothing Store,Venue Category_Coffee Shop,Venue Category_Cosmetics Shop,Venue Category_Bubble Tea Shop,Venue Category_Café,Venue Category_Middle Eastern Restaurant,Venue Category_Japanese Restaurant,Venue Category_Fast Food Restaurant,Venue Category_Bookstore,Venue Category_Ramen Restaurant
2,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Gastropub,Venue Category_Cocktail Bar,Venue Category_Restaurant,Venue Category_Cosmetics Shop,Venue Category_Gym,Venue Category_Farmers Market,Venue Category_Moroccan Restaurant,Venue Category_Department Store
3,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Cocktail Bar,Venue Category_Bakery,Venue Category_Cheese Shop,Venue Category_Farmers Market,Venue Category_Pharmacy,Venue Category_Restaurant,Venue Category_Seafood Restaurant,Venue Category_Beer Bar,Venue Category_Thai Restaurant
4,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Italian Restaurant,Venue Category_Sandwich Place,Venue Category_Café,Venue Category_Thai Restaurant,Venue Category_Salad Place,Venue Category_Bubble Tea Shop,Venue Category_Burger Joint,Venue Category_Japanese Restaurant,Venue Category_Modern European Restaurant
6,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Restaurant,Venue Category_Thai Restaurant,Venue Category_Hotel,Venue Category_Deli / Bodega,Venue Category_Clothing Store,Venue Category_Gym,Venue Category_Pizza Place,Venue Category_Concert Hall
7,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Aquarium,Venue Category_Hotel,Venue Category_Café,Venue Category_Sporting Goods Shop,Venue Category_Italian Restaurant,Venue Category_Scenic Lookout,Venue Category_Brewery,Venue Category_Restaurant,Venue Category_Fried Chicken Joint
8,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Hotel,Venue Category_Café,Venue Category_Restaurant,Venue Category_Salad Place,Venue Category_Seafood Restaurant,Venue Category_Japanese Restaurant,Venue Category_Italian Restaurant,Venue Category_Sporting Goods Shop,Venue Category_Sushi Restaurant
9,Downtown Toronto,0,Venue Category_Coffee Shop,Venue Category_Restaurant,Venue Category_Café,Venue Category_Hotel,Venue Category_Gym,Venue Category_Italian Restaurant,Venue Category_American Restaurant,Venue Category_Seafood Restaurant,Venue Category_Cocktail Bar,Venue Category_Japanese Restaurant
11,Downtown Toronto,0,Venue Category_Café,Venue Category_Coffee Shop,Venue Category_Vietnamese Restaurant,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Mexican Restaurant,Venue Category_Grocery Store,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Park,Venue Category_Bar


In [161]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, 
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Downtown Toronto,1,Venue Category_Park,Venue Category_Playground,Venue Category_Trail,Venue Category_Dance Studio,Venue Category_Dumpling Restaurant,Venue Category_Donut Shop,Venue Category_Doner Restaurant,Venue Category_Dog Run,Venue Category_Distribution Center,Venue Category_Discount Store


In [162]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, 
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Downtown Toronto,2,Venue Category_Grocery Store,Venue Category_Café,Venue Category_Park,Venue Category_Candy Store,Venue Category_Italian Restaurant,Venue Category_Athletics & Sports,Venue Category_Restaurant,Venue Category_Baby Store,Venue Category_Nightclub,Venue Category_Coffee Shop


In [163]:
downtown_merged.loc[downtown_merged['Cluster Labels'] ==3, 
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Downtown Toronto,3,Venue Category_Airport Lounge,Venue Category_Airport Terminal,Venue Category_Plane,Venue Category_Harbor / Marina,Venue Category_Sculpture Garden,Venue Category_Boat or Ferry,Venue Category_Rental Car Location,Venue Category_Bar,Venue Category_Coffee Shop,Venue Category_Boutique


In [164]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, 
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,4,Venue Category_Café,Venue Category_Bookstore,Venue Category_Bar,Venue Category_Italian Restaurant,Venue Category_Japanese Restaurant,Venue Category_Bakery,Venue Category_Yoga Studio,Venue Category_Beer Bar,Venue Category_Comfort Food Restaurant,Venue Category_Sandwich Place
