# Toronto Neighbourhood Analysis

## 10 neighbourhoods will be selected based on crime rates and average rents. Then the neighbourhoods will be clustered and selected based on the venues in or around them.

#### Import the libraries and the data

In [1]:
import pandas as pd
import seaborn as sns
import folium
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim 
import requests 
import json 
from pandas.io.json import json_normalize 

In [2]:
df = pd.read_csv('C:/Users/abmuhame/Desktop/projects/wellbeing_toronto.csv')

In [3]:
df.head()

Unnamed: 0,Neighbourhood,Neighbourhood Id,Drug Arrests,Assaults,Sexual Assaults,Break & Enters,Robberies,Vehicle Thefts,Thefts,Murders,Tenant Average Rent
0,West Humber-Clairville,1,29,259,31,131,82,316,38,3,945
1,Mount Olive-Silverstone-Jamestown,2,24,213,16,34,81,42,3,1,921
2,Thistletown-Beaumond Heights,3,7,35,3,23,12,13,1,0,887
3,Rexdale-Kipling,4,11,57,5,16,15,22,0,0,857
4,Elms-Old Rexdale,5,9,53,2,9,14,16,0,0,966


In [4]:
df.columns.to_list()

['Neighbourhood',
 'Neighbourhood Id',
 'Drug Arrests',
 'Assaults',
 'Sexual Assaults',
 'Break & Enters',
 'Robberies',
 'Vehicle Thefts',
 'Thefts',
 'Murders',
 'Tenant Average Rent']

In [5]:
toronto_avg_rent = df[['Neighbourhood', 'Neighbourhood Id', 'Tenant Average Rent']]

In [6]:
toronto_avg_rent.head()

Unnamed: 0,Neighbourhood,Neighbourhood Id,Tenant Average Rent
0,West Humber-Clairville,1,945
1,Mount Olive-Silverstone-Jamestown,2,921
2,Thistletown-Beaumond Heights,3,887
3,Rexdale-Kipling,4,857
4,Elms-Old Rexdale,5,966


In [7]:
toronto_crime = df[['Neighbourhood', 'Drug Arrests', 'Assaults', 'Sexual Assaults', 'Break & Enters', 'Robberies', 
                    'Vehicle Thefts', 'Thefts', 'Murders']]

In [8]:
toronto_crime.head()

Unnamed: 0,Neighbourhood,Drug Arrests,Assaults,Sexual Assaults,Break & Enters,Robberies,Vehicle Thefts,Thefts,Murders
0,West Humber-Clairville,29,259,31,131,82,316,38,3
1,Mount Olive-Silverstone-Jamestown,24,213,16,34,81,42,3,1
2,Thistletown-Beaumond Heights,7,35,3,23,12,13,1,0
3,Rexdale-Kipling,11,57,5,16,15,22,0,0
4,Elms-Old Rexdale,9,53,2,9,14,16,0,0


In [9]:
toronto_crime['Total'] = toronto_crime.sum(axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [10]:
toronto_crime.head()

Unnamed: 0,Neighbourhood,Drug Arrests,Assaults,Sexual Assaults,Break & Enters,Robberies,Vehicle Thefts,Thefts,Murders,Total
0,West Humber-Clairville,29,259,31,131,82,316,38,3,889
1,Mount Olive-Silverstone-Jamestown,24,213,16,34,81,42,3,1,414
2,Thistletown-Beaumond Heights,7,35,3,23,12,13,1,0,94
3,Rexdale-Kipling,11,57,5,16,15,22,0,0,126
4,Elms-Old Rexdale,9,53,2,9,14,16,0,0,103


In [11]:
# load data into panda dataframe
data = pd.read_csv('https://ckan0.cf.opendata.inter.prod-toronto.ca/download_resource/a083c865-6d60-4d1d-b6c6-b0c8a85f9c15?format=csv&projection=4326')

In [12]:
neighborhoods_coords = data[['AREA_SHORT_CODE', 'AREA_NAME', 'LATITUDE', 'LONGITUDE']]

In [13]:
neighborhoods_coords.head()

Unnamed: 0,AREA_SHORT_CODE,AREA_NAME,LATITUDE,LONGITUDE
0,94,Wychwood (94),43.676919,-79.425515
1,100,Yonge-Eglinton (100),43.704689,-79.40359
2,97,Yonge-St.Clair (97),43.687859,-79.397871
3,27,York University Heights (27),43.765736,-79.488883
4,31,Yorkdale-Glen Park (31),43.714672,-79.457108


In [14]:
toronto_geo = r'C:/Users/abmuhame/Desktop/projects/toronto.geojson' # geojson file

#### Descriptive Statistics

In [15]:
toronto_crime.describe()

Unnamed: 0,Drug Arrests,Assaults,Sexual Assaults,Break & Enters,Robberies,Vehicle Thefts,Thefts,Murders,Total
count,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0
mean,20.757143,108.421429,11.035714,45.221429,20.935714,23.135714,6.957143,0.521429,236.985714
std,26.473516,102.18787,12.221817,31.438891,20.128083,31.197724,8.673843,1.035123,200.854541
min,0.0,9.0,0.0,8.0,0.0,1.0,0.0,0.0,40.0
25%,7.0,47.75,4.75,24.0,8.0,9.0,2.0,0.0,116.75
50%,12.5,78.0,8.0,37.5,15.0,16.0,4.5,0.0,176.5
75%,24.0,133.75,12.0,57.0,25.0,26.25,8.0,1.0,277.75
max,174.0,712.0,88.0,213.0,112.0,316.0,56.0,6.0,1292.0


In [16]:
toronto_avg_rent['Tenant Average Rent'].describe()

count     140.000000
mean     1019.792857
std       219.621994
min       631.000000
25%       878.500000
50%       972.500000
75%      1124.750000
max      2388.000000
Name: Tenant Average Rent, dtype: float64

### Toronto Neighbourhoods Crime Rates Map

In [17]:
toronto_map = folium.Map(location=[43.6534817, -79.3839347], zoom_start=11, tiles='Mapbox Bright')

In [18]:
# create a numpy array of length 6 and has linear spacing from the minium crime to the maximum crime
threshold_scale = np.linspace(toronto_crime['Total'].min(),
                              toronto_crime['Total'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum immigration

# generate choropleth map using the total crime of each neighbourhood in Toronto 
toronto_map.choropleth(
    geo_data=toronto_geo,
    data=toronto_crime,
    columns=['Neighbourhood', 'Total'],
    key_on='feature.properties.HOOD',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Crime rates in Toronto',
    reset=True
)

# display map
toronto_map

### Toronto Neighbourhoods Average Rents Map

In [19]:
map_toronto = folium.Map(location=[43.653908, -79.384293], zoom_start=11, tiles='Mapbox Bright')

In [20]:
# create a numpy array of length 6 and has linear spacing from the minium crime to the maximum crime
threshold_scale1 = np.linspace(toronto_avg_rent['Tenant Average Rent'].min(),
                              toronto_avg_rent['Tenant Average Rent'].max(),
                              6, dtype=int)
threshold_scale1 = threshold_scale1.tolist() # change the numpy array to a list
threshold_scale1[-1] = threshold_scale1[-1] + 1 # make sure that the last value of the list is greater than the maximum immigration

# generate choropleth map using the total crime of each neighbourhood in Toronto 
map_toronto.choropleth(
    geo_data=toronto_geo,
    data=toronto_avg_rent,
    columns=['Neighbourhood', 'Tenant Average Rent'],
    key_on='feature.properties.HOOD',
    threshold_scale=threshold_scale1,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Average rent in Toronto',
    reset=True
)

# display map
map_toronto

### Selecting the safest and affordable neighbourhoods for clustering

In [21]:
combined_data = toronto_crime.merge(toronto_avg_rent)

In [22]:
combined_data = combined_data[['Neighbourhood Id', 'Neighbourhood', 'Total', 'Tenant Average Rent']]

In [23]:
combined_data.sort_values('Total', inplace=True)

In [24]:
safe_neighbourhoods = combined_data[:45]

In [25]:
safe_neighbourhoods 

Unnamed: 0,Neighbourhood Id,Neighbourhood,Total,Tenant Average Rent
113,114,Lambton Baby Point,40,814
101,102,Forest Hill North,57,1132
98,99,Mount Pleasant East,60,1097
100,101,Forest Hill South,66,1206
11,12,Markland Wood,67,1112
28,29,Maple Leaf,70,936
59,60,Woodbine-Lumsden,70,823
57,58,Old East York,72,959
55,56,Leaside-Bennington,73,1158
104,105,Lawrence Park North,73,1290


In [26]:
safe_neighbourhoods.sort_values('Tenant Average Rent', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [27]:
safe_neighbourhoods

Unnamed: 0,Neighbourhood Id,Neighbourhood,Total,Tenant Average Rent
47,48,Hillcrest Village,112,678
27,28,Rustic,93,740
111,112,Beechborough-Greenbrook,117,749
68,69,Blake-Jones,125,778
109,110,Keelesdale-Eglinton West,117,796
113,114,Lambton Baby Point,40,814
59,60,Woodbine-Lumsden,70,823
3,4,Rexdale-Kipling,126,857
2,3,Thistletown-Beaumond Heights,94,887
133,134,Highland Creek,118,909


In [28]:
affordable_areas = safe_neighbourhoods[:10]

In [29]:
affordable_areas

Unnamed: 0,Neighbourhood Id,Neighbourhood,Total,Tenant Average Rent
47,48,Hillcrest Village,112,678
27,28,Rustic,93,740
111,112,Beechborough-Greenbrook,117,749
68,69,Blake-Jones,125,778
109,110,Keelesdale-Eglinton West,117,796
113,114,Lambton Baby Point,40,814
59,60,Woodbine-Lumsden,70,823
3,4,Rexdale-Kipling,126,857
2,3,Thistletown-Beaumond Heights,94,887
133,134,Highland Creek,118,909


##### Merge affordable_areas dataframe with neighborhoods_coords to create selected_neighbourhoods dataframe

In [30]:
neighborhoods_coords

Unnamed: 0,AREA_SHORT_CODE,AREA_NAME,LATITUDE,LONGITUDE
0,94,Wychwood (94),43.676919,-79.425515
1,100,Yonge-Eglinton (100),43.704689,-79.403590
2,97,Yonge-St.Clair (97),43.687859,-79.397871
3,27,York University Heights (27),43.765736,-79.488883
4,31,Yorkdale-Glen Park (31),43.714672,-79.457108
...,...,...,...,...
135,124,Kennedy Park (124),43.725556,-79.260382
136,78,Kensington-Chinatown (78),43.653554,-79.397240
137,6,Kingsview Village-The Westway (6),43.698993,-79.547863
138,15,Kingsway South (15),43.653520,-79.510577


In [31]:
selected_neighourhoods = affordable_areas.merge(neighborhoods_coords, left_on='Neighbourhood Id', right_on='AREA_SHORT_CODE')

In [32]:
selected_neighourhoods

Unnamed: 0,Neighbourhood Id,Neighbourhood,Total,Tenant Average Rent,AREA_SHORT_CODE,AREA_NAME,LATITUDE,LONGITUDE
0,48,Hillcrest Village,112,678,48,Hillcrest Village (48),43.802988,-79.354804
1,28,Rustic,93,740,28,Rustic (28),43.711609,-79.498091
2,112,Beechborough-Greenbrook,117,749,112,Beechborough-Greenbrook (112),43.693216,-79.479473
3,69,Blake-Jones,125,778,69,Blake-Jones (69),43.676173,-79.337394
4,110,Keelesdale-Eglinton West,117,796,110,Keelesdale-Eglinton West (110),43.685727,-79.471437
5,114,Lambton Baby Point,40,814,114,Lambton Baby Point (114),43.65742,-79.496045
6,60,Woodbine-Lumsden,70,823,60,Woodbine-Lumsden (60),43.694107,-79.311164
7,4,Rexdale-Kipling,126,857,4,Rexdale-Kipling (4),43.723725,-79.566228
8,3,Thistletown-Beaumond Heights,94,887,3,Thistletown-Beaumond Heights (3),43.737988,-79.563491
9,134,Highland Creek,118,909,134,Highland Creek (134),43.790775,-79.177472


#### Define Foursquare Credentials and Version

In [33]:
CLIENT_ID = 'RKDJQBZNNPUKYMPGSKMECRP2HQJQBJNH1BVS3NJDKSUD04M4' # your Foursquare ID
CLIENT_SECRET = 'IIS1BX3SYPWJ5HBPGJKJZ2DGPJ2033FWCKCDBSZHSFTY3UUA' # your Foursquare Secret
VERSION = '20200705' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RKDJQBZNNPUKYMPGSKMECRP2HQJQBJNH1BVS3NJDKSUD04M4
CLIENT_SECRET:IIS1BX3SYPWJ5HBPGJKJZ2DGPJ2033FWCKCDBSZHSFTY3UUA


In [34]:
# Get the neighborhood names
selected_neighourhoods.loc[0, 'Neighbourhood']

'Hillcrest Village'

In [35]:
#Get the neighborhood's latitude and longitude values.

neighborhood_latitude = selected_neighourhoods.loc[0, 'LATITUDE'] # neighborhood latitude value
neighborhood_longitude = selected_neighourhoods.loc[0, 'LONGITUDE'] # neighborhood longitude value

neighborhood_name = selected_neighourhoods.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Hillcrest Village are 43.8029878281, -79.3548039999.


#### Now, let's get the top 100 venues that are near selected Toronto neighbourhoods within a radius of 1000 meters.

In [36]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 1000 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=RKDJQBZNNPUKYMPGSKMECRP2HQJQBJNH1BVS3NJDKSUD04M4&client_secret=IIS1BX3SYPWJ5HBPGJKJZ2DGPJ2033FWCKCDBSZHSFTY3UUA&v=20200705&ll=43.8029878281,-79.3548039999&radius=1000&limit=100'

In [37]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f0ca0288dec2f5f3372c0cc'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 22,
  'suggestedBounds': {'ne': {'lat': 43.81198783710001,
    'lng': -79.34235713199735},
   'sw': {'lat': 43.79398781909999, 'lng': -79.36725086780265}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd9842be914a593adbd56fa',
       'name': 'Tastee',
       'location': {'address': '3913 Don Mills Rd.',
        'crossStreet': 'at Cliffwood Rd.',
        'lat': 43.80772211146167,
        'lng': -79.35679781099806,
        'labeledLatLngs': [{'label': 'display',
      

In [38]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [39]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Tastee,Bakery,43.807722,-79.356798
1,Ikea Warehouse,Furniture / Home Store,43.808544,-79.345188
2,GoodLife Fitness North York Gordon Baker and M...,Gym,43.801362,-79.343862
3,TD Canada Trust,Bank,43.794247,-79.353239
4,Subway,Sandwich Place,43.794492,-79.353595
5,Starbucks,Coffee Shop,43.795804,-79.349547
6,Listo's No Frills,Grocery Store,43.794373,-79.353754
7,Woodbrooke Estate,Residential Building (Apartment / Condo),43.802067,-79.354347
8,Starbucks,Coffee Shop,43.795804,-79.349547
9,McNicoll Park,Park,43.798994,-79.35284


In [40]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

22 venues were returned by Foursquare.


## Explore selected safe and affordable neighborhoods in Toronto

In [41]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [42]:
selected_neighborhood_venues = getNearbyVenues(names=selected_neighourhoods['Neighbourhood'],
                                   latitudes=selected_neighourhoods['LATITUDE'],
                                   longitudes=selected_neighourhoods['LONGITUDE']
                                  )

Hillcrest Village
Rustic
Beechborough-Greenbrook
Blake-Jones
Keelesdale-Eglinton West
Lambton Baby Point
Woodbine-Lumsden
Rexdale-Kipling
Thistletown-Beaumond Heights
Highland Creek


In [43]:
print(selected_neighborhood_venues.shape)
selected_neighborhood_venues.head()

(62, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.802988,-79.354804,Woodbrooke Estate,43.802067,-79.354347,Residential Building (Apartment / Condo)
1,Hillcrest Village,43.802988,-79.354804,McNicoll Park,43.798994,-79.35284,Park
2,Beechborough-Greenbrook,43.693216,-79.479473,McDonald's,43.692926,-79.479982,Fast Food Restaurant
3,Beechborough-Greenbrook,43.693216,-79.479473,York Museum,43.689737,-79.476943,Museum
4,Beechborough-Greenbrook,43.693216,-79.479473,2 Brothers shawarma,43.690118,-79.47515,Turkish Restaurant


In [44]:
selected_neighborhood_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Beechborough-Greenbrook,3,3,3,3,3,3
Blake-Jones,16,16,16,16,16,16
Highland Creek,6,6,6,6,6,6
Hillcrest Village,2,2,2,2,2,2
Keelesdale-Eglinton West,8,8,8,8,8,8
Lambton Baby Point,3,3,3,3,3,3
Rexdale-Kipling,1,1,1,1,1,1
Thistletown-Beaumond Heights,14,14,14,14,14,14
Woodbine-Lumsden,9,9,9,9,9,9


In [45]:
print('There are {} uniques categories.'.format(len(selected_neighborhood_venues['Venue Category'].unique())))

There are 44 uniques categories.


#### Analyze each neighborhoods

In [46]:
# one hot encoding
selected_neighourhoods_onehot = pd.get_dummies(selected_neighborhood_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
selected_neighourhoods_onehot['Neighborhood'] = selected_neighborhood_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [selected_neighourhoods_onehot.columns[-1]] + list(selected_neighourhoods_onehot.columns[:-1])
selected_neighourhoods_onehot = selected_neighourhoods_onehot[fixed_columns]

selected_neighourhoods_onehot.head()

Unnamed: 0,Wine Shop,Asian Restaurant,Bakery,Bank,Beer Bar,Beer Store,Bookstore,Burger Joint,Bus Station,Café,...,River,Sandwich Place,Skating Rink,Spa,Supermarket,Thai Restaurant,Thrift / Vintage Store,Toy / Game Store,Turkish Restaurant,Video Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [47]:
selected_neighourhoods_onehot.shape

(62, 44)

In [48]:
selected_neighourhoods_grouped = selected_neighourhoods_onehot.groupby('Neighborhood').mean().reset_index()
selected_neighourhoods_grouped

Unnamed: 0,Neighborhood,Wine Shop,Asian Restaurant,Bakery,Bank,Beer Bar,Beer Store,Bookstore,Burger Joint,Bus Station,...,River,Sandwich Place,Skating Rink,Spa,Supermarket,Thai Restaurant,Thrift / Vintage Store,Toy / Game Store,Turkish Restaurant,Video Store
0,Beechborough-Greenbrook,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0
1,Blake-Jones,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0625,0.0625,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0
2,Highland Creek,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Keelesdale-Eglinton West,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0
5,Lambton Baby Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Rexdale-Kipling,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Thistletown-Beaumond Heights,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0
8,Woodbine-Lumsden,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,...,0.0,0.0,0.222222,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111


In [49]:
selected_neighourhoods_grouped.shape

(9, 44)

In [50]:
num_top_venues = 5

for hood in selected_neighourhoods_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = selected_neighourhoods_grouped[selected_neighourhoods_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Beechborough-Greenbrook----
                  venue  freq
0    Turkish Restaurant  0.33
1  Fast Food Restaurant  0.33
2                Museum  0.33
3             Wine Shop  0.00
4           Pizza Place  0.00


----Blake-Jones----
                venue  freq
0                Café  0.19
1         Coffee Shop  0.12
2              Hostel  0.06
3  Dim Sum Restaurant  0.06
4           Gastropub  0.06


----Highland Creek----
                        venue  freq
0                 IT Services  0.17
1  Construction & Landscaping  0.17
2                Home Service  0.17
3                        Park  0.17
4                 Bus Station  0.17


----Hillcrest Village----
                                      venue  freq
0  Residential Building (Apartment / Condo)   0.5
1                                      Park   0.5
2                                 Wine Shop   0.0
3                         Indian Restaurant   0.0
4                             Jewelry Store   0.0


----Keelesdale-Eglinton Wes

In [51]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [52]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = selected_neighourhoods_grouped['Neighborhood']

for ind in np.arange(selected_neighourhoods_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(selected_neighourhoods_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Beechborough-Greenbrook,Turkish Restaurant,Fast Food Restaurant,Museum,Gastropub,Garden,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping
1,Blake-Jones,Café,Coffee Shop,Hostel,Burger Joint,Diner,Dim Sum Restaurant,Nail Salon,Gastropub,Pizza Place,Bookstore
2,Highland Creek,Home Service,IT Services,Bus Station,Construction & Landscaping,Park,Coffee Shop,Garden,Fast Food Restaurant,Diner,Dim Sum Restaurant
3,Hillcrest Village,Park,Residential Building (Apartment / Condo),Video Store,Caribbean Restaurant,Fast Food Restaurant,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping
4,Keelesdale-Eglinton West,Pizza Place,Bakery,Latin American Restaurant,Sandwich Place,Wine Shop,Fast Food Restaurant,Thrift / Vintage Store,Coffee Shop,Diner,Dim Sum Restaurant


### Cluster neighborhoods

In [53]:
# set number of clusters
kclusters = 6

selected_neighourhoods_grouped_clustering = selected_neighourhoods_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(selected_neighourhoods_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 3, 4, 1, 5, 2, 1, 3])

In [54]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster_labels', kmeans.labels_)

In [55]:
neighborhoods_venues_sorted['Cluster_labels'] = neighborhoods_venues_sorted['Cluster_labels'].astype('int64')

In [56]:
type(neighborhoods_venues_sorted['Cluster_labels'][0])

numpy.int64

In [57]:
selected_neighourhoods_merged = neighborhoods_venues_sorted

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
selected_neighourhoods_merged = selected_neighourhoods_merged.join(selected_neighourhoods.set_index('Neighbourhood'), on='Neighborhood')

selected_neighourhoods_merged.head() # check the last columns!

Unnamed: 0,Cluster_labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Neighbourhood Id,Total,Tenant Average Rent,AREA_SHORT_CODE,AREA_NAME,LATITUDE,LONGITUDE
0,0,Beechborough-Greenbrook,Turkish Restaurant,Fast Food Restaurant,Museum,Gastropub,Garden,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping,112,117,749,112,Beechborough-Greenbrook (112),43.693216,-79.479473
1,1,Blake-Jones,Café,Coffee Shop,Hostel,Burger Joint,Diner,Dim Sum Restaurant,Nail Salon,Gastropub,Pizza Place,Bookstore,69,125,778,69,Blake-Jones (69),43.676173,-79.337394
2,3,Highland Creek,Home Service,IT Services,Bus Station,Construction & Landscaping,Park,Coffee Shop,Garden,Fast Food Restaurant,Diner,Dim Sum Restaurant,134,118,909,134,Highland Creek (134),43.790775,-79.177472
3,4,Hillcrest Village,Park,Residential Building (Apartment / Condo),Video Store,Caribbean Restaurant,Fast Food Restaurant,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping,48,112,678,48,Hillcrest Village (48),43.802988,-79.354804
4,1,Keelesdale-Eglinton West,Pizza Place,Bakery,Latin American Restaurant,Sandwich Place,Wine Shop,Fast Food Restaurant,Thrift / Vintage Store,Coffee Shop,Diner,Dim Sum Restaurant,110,117,796,110,Keelesdale-Eglinton West (110),43.685727,-79.471437


In [58]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tc_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto city are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto city are 43.6534817, -79.3839347.


In [59]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(selected_neighourhoods_merged['LATITUDE'], selected_neighourhoods_merged['LONGITUDE'], selected_neighourhoods_merged['Neighborhood'], selected_neighourhoods_merged['Cluster_labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

#### Cluster 1

In [60]:
selected_neighourhoods_merged.loc[selected_neighourhoods_merged['Cluster_labels'] == 0, selected_neighourhoods_merged.columns[[1] + list(range(2, selected_neighourhoods_merged.shape[1] - 7))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Beechborough-Greenbrook,Turkish Restaurant,Fast Food Restaurant,Museum,Gastropub,Garden,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping


#### Cluster 2

In [61]:
selected_neighourhoods_merged.loc[selected_neighourhoods_merged['Cluster_labels'] == 1, selected_neighourhoods_merged.columns[[1] + list(range(2, selected_neighourhoods_merged.shape[1] - 7))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Blake-Jones,Café,Coffee Shop,Hostel,Burger Joint,Diner,Dim Sum Restaurant,Nail Salon,Gastropub,Pizza Place,Bookstore
4,Keelesdale-Eglinton West,Pizza Place,Bakery,Latin American Restaurant,Sandwich Place,Wine Shop,Fast Food Restaurant,Thrift / Vintage Store,Coffee Shop,Diner,Dim Sum Restaurant
7,Thistletown-Beaumond Heights,Indian Restaurant,Caribbean Restaurant,Pharmacy,Asian Restaurant,Thai Restaurant,Supermarket,Bank,Ice Cream Shop,Pizza Place,Coffee Shop


#### Cluster 3

In [62]:
selected_neighourhoods_merged.loc[selected_neighourhoods_merged['Cluster_labels'] == 2, selected_neighourhoods_merged.columns[[1] + list(range(2, selected_neighourhoods_merged.shape[1] - 7))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Rexdale-Kipling,Jewelry Store,Video Store,Home Service,Garden,Fast Food Restaurant,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping


#### Cluster 4

In [63]:
selected_neighourhoods_merged.loc[selected_neighourhoods_merged['Cluster_labels'] == 3, selected_neighourhoods_merged.columns[[1] + list(range(2, selected_neighourhoods_merged.shape[1] - 7))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Highland Creek,Home Service,IT Services,Bus Station,Construction & Landscaping,Park,Coffee Shop,Garden,Fast Food Restaurant,Diner,Dim Sum Restaurant
8,Woodbine-Lumsden,Park,Skating Rink,Video Store,Curling Ice,Spa,Beer Store,Convenience Store,Fast Food Restaurant,Diner,Dim Sum Restaurant


#### Cluster 5

In [64]:
selected_neighourhoods_merged.loc[selected_neighourhoods_merged['Cluster_labels'] == 4, selected_neighourhoods_merged.columns[[1] + list(range(2, selected_neighourhoods_merged.shape[1] - 7))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Hillcrest Village,Park,Residential Building (Apartment / Condo),Video Store,Caribbean Restaurant,Fast Food Restaurant,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping


#### Cluster 6

In [65]:
selected_neighourhoods_merged.loc[selected_neighourhoods_merged['Cluster_labels'] == 5, selected_neighourhoods_merged.columns[[1] + list(range(2, selected_neighourhoods_merged.shape[1] - 7))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Lambton Baby Point,Garden,River,Park,Caribbean Restaurant,Fast Food Restaurant,Diner,Dim Sum Restaurant,Curling Ice,Convenience Store,Construction & Landscaping
