Segmenting and Clustering Neighborhoods in Toronto
============================================

### Begin Part 1

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library


Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.



In [2]:
# The code was removed by Watson Studio for sharing.

#####  Import pre-processed list of Foursquare categories with top-level category identified

In [3]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

In [4]:
# The code was removed by Watson Studio for sharing.

In [5]:
cat_list = pd.read_csv(body)


In [6]:
small_cat_list = cat_list.drop(["Countries", "Top-Level"], axis=1)


In [7]:
small_cat_list.head()

Unnamed: 0,CategoryName,ID,Top-Level Category
0,Arts & Entertainment,4d4b7104d754a06370d81259,Arts & Entertainment
1,Amphitheater,56aa371be4b08b9a8d5734db,Arts & Entertainment
2,Aquarium,4fceea171983d5d06c3e9823,Arts & Entertainment
3,Arcade,4bf58dd8d48988d1e1931735,Arts & Entertainment
4,Art Gallery,4bf58dd8d48988d1e2931735,Arts & Entertainment


##### Form list of Toronto neighborhoods with latitude and longitude using postal codes

In [8]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [9]:
# Retreive data from URL, form into data frame

tn = pd.read_html(url, header=0)[0]

column_names = ["PostalCode", "Borough", "Neighborhood"]

tn.columns = column_names

In [10]:
# drop rows with Borough = "Not assigned"

tn = tn[tn.Borough != 'Not assigned']

In [11]:
# Replace Neighborhood cells that have Not Assigned with the Borough

tn.Neighborhood = tn. Neighborhood.replace('Not assigned', tn['Borough'])

In [12]:
# Group data by Postal Code and Borough to obtain data frame shown in assignment.

tn = tn.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
tn.shape

(103, 3)

In [13]:
tn

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [14]:
toronto_data_url = 'https://cocl.us/Geospatial_data'

In [15]:
toronto_data = pd.read_csv(toronto_data_url)

In [16]:
# Rename postal code column name to match previous data set
toronto_data.rename(columns={'Postal Code':'PostalCode'},inplace=True)

In [17]:
# Merge Toronto neighborhood dataframe with Toronto Latitude / Longitude data
tn_merge = pd.merge(tn, toronto_data, on="PostalCode")

In [18]:
tn_merge

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848




## Form base map of Toronto

In [19]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="t_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [20]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(tn_merge['Latitude'], tn_merge['Longitude'], tn_merge['Borough'], tn_merge['Neighborhood']):
    label = '{} // {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Include all Toronto neighborhoods in the analysis

In [21]:
t_only_data = tn_merge

In [22]:
t_only_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [23]:
# Identify latitude and longitude for neighborhood to explore

n_lat = t_only_data.loc[10,'Latitude']  # neighborhood latitude
n_long = t_only_data.loc[10,'Longitude']  # neighborhood longitude

n_name = t_only_data.loc[10,'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(n_name, 
                                                               n_lat, 
                                                               n_long))

Latitude and longitude values of Dorset Park, Scarborough Town Centre, Wexford Heights are 43.7574096, -79.27330400000001.


### build URL to hit foursquare API for identified lat and long

In [24]:
radius = 500
LIMIT = 100

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, n_lat, n_long, VERSION, radius, LIMIT)


In [25]:
results = requests.get(url).json()


In [26]:
results

{'meta': {'code': 200, 'requestId': '5d4117d799295100252c7508'},
 'response': {'headerLocation': 'Dorset Park',
  'headerFullLocation': 'Dorset Park, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 7,
  'suggestedBounds': {'ne': {'lat': 43.761909604500005,
    'lng': -79.26708530848985},
   'sw': {'lat': 43.7529095955, 'lng': -79.27952269151017}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5226562611d2cd49d83ef03b',
       'name': 'Kairali',
       'location': {'address': '1210 Kennedy Road',
        'crossStreet': 'Lawrence',
        'lat': 43.754914739291834,
        'lng': -79.27694504646365,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.754914739291834,
          'lng': -79.27694504646365}],
        'distance': 40

In [27]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['id']

In [28]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

# add top-level category for each row
nearby_venues = pd.merge(nearby_venues, small_cat_list, left_on='categories', right_on='ID')

# drop category ID columns
nearby_venues.drop(["categories","ID"],axis=1, inplace=True)

nearby_venues.head()

Unnamed: 0,name,lat,lng,CategoryName,Top-Level Category
0,Kairali,43.754915,-79.276945,Indian Restaurant,Food
1,Karaikudi Chettinad South Indian Restaurant,43.756042,-79.276276,Indian Restaurant,Food
2,Kim Kim restaurant,43.753833,-79.276611,Chinese Restaurant,Food
3,Pho Vietnam,43.75777,-79.278572,Vietnamese Restaurant,Food
4,Big Al's Pet Supercentre,43.759279,-79.278325,Pet Store,Shop & Service


In [29]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

7 venues were returned by Foursquare.


In [32]:
# Borrow function from Neighborhoods lab and modify to explore Toronto neighborhoods

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Run the getNearbyVenues function on our selected Toronto neighborhoods

In [33]:
toronto_venues = getNearbyVenues(names=t_only_data['Neighborhood'],
                                   latitudes=t_only_data['Latitude'],
                                   longitudes=t_only_data['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [34]:
print(toronto_venues.shape)
toronto_venues

(2252, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
5,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location
6,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Woburn Medical Centre,43.766631,-79.192286,Medical Center
7,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Lawrence Ave E & Kingston Rd,43.767704,-79.18949,Intersection
8,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Eggsmart,43.7678,-79.190466,Breakfast Spot
9,Woburn,43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop


In [35]:
# add top-level category for each row
toronto_venues_merged = pd.merge(toronto_venues, small_cat_list, left_on='Venue Category', right_on='CategoryName')

# drop category ID columns
toronto_venues_merged.drop(["CategoryName","ID"],axis=1, inplace=True)

toronto_venues_merged.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Top-Level Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant,Food
1,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,Dairy Queen,43.710378,-79.290701,Fast Food Restaurant,Food
2,"Clarks Corners, Sullivan, Tam O'Shanter",43.781638,-79.304302,KFC,43.77944,-79.303371,Fast Food Restaurant,Food
3,L'Amoreaux West,43.799525,-79.318389,KFC,43.798938,-79.318854,Fast Food Restaurant,Food
4,L'Amoreaux West,43.799525,-79.318389,McDonald's,43.79888,-79.318724,Fast Food Restaurant,Food


In [36]:
toronto_venues_merged.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Top-Level Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100,100
Agincourt,4,4,4,4,4,4,4
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,2,2,2,2,2,2
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",9,9,9,9,9,9,9
"Alderwood, Long Branch",9,9,9,9,9,9,9
"Bathurst Manor, Downsview North, Wilson Heights",20,20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24,24
Berczy Park,57,57,57,57,57,57,57
"Birch Cliff, Cliffside West",4,4,4,4,4,4,4


In [37]:
toronto_venues_merged_count = toronto_venues_merged.groupby('Neighborhood').count()['Top-Level Category']

#toronto_venues_merged_count.columns = ['Neighborhood', 'VenueCount']
toronto_venues_merged_count.head()

Neighborhood
Adelaide, King, Richmond                                                                                         100
Agincourt                                                                                                          4
Agincourt North, L'Amoreaux East, Milliken, Steeles East                                                           2
Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown      9
Alderwood, Long Branch                                                                                             9
Name: Top-Level Category, dtype: int64

In [38]:
print('There are {} unique categories.'.format(len(toronto_venues_merged['Venue Category'].unique())))

There are 276 unique categories.


In [39]:
print('There are {} unique top-level categories.'.format(len(toronto_venues_merged['Top-Level Category'].unique())))

There are 8 unique top-level categories.


### Analyze Toronto Neighborhoods by Top Level Category

In [40]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues_merged[['Top-Level Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues_merged['Neighborhood'] 

# move neighborhood column to the first column
n = toronto_onehot['Neighborhood']
toronto_onehot.drop(labels=['Neighborhood'], axis=1,inplace=True)
toronto_onehot.insert(0,'Neighborhood',n)

toronto_onehot.head()

Unnamed: 0,Neighborhood,Arts & Entertainment,College & University,Food,Nightlife Spot,Outdoors & Recreation,Professional & Other Places,Shop & Service,Travel & Transport
0,"Rouge, Malvern",0,0,1,0,0,0,0,0
1,"Clairlea, Golden Mile, Oakridge",0,0,1,0,0,0,0,0
2,"Clarks Corners, Sullivan, Tam O'Shanter",0,0,1,0,0,0,0,0
3,L'Amoreaux West,0,0,1,0,0,0,0,0
4,L'Amoreaux West,0,0,1,0,0,0,0,0


In [41]:
toronto_onehot

Unnamed: 0,Neighborhood,Arts & Entertainment,College & University,Food,Nightlife Spot,Outdoors & Recreation,Professional & Other Places,Shop & Service,Travel & Transport
0,"Rouge, Malvern",0,0,1,0,0,0,0,0
1,"Clairlea, Golden Mile, Oakridge",0,0,1,0,0,0,0,0
2,"Clarks Corners, Sullivan, Tam O'Shanter",0,0,1,0,0,0,0,0
3,L'Amoreaux West,0,0,1,0,0,0,0,0
4,L'Amoreaux West,0,0,1,0,0,0,0,0
5,Hillcrest Village,0,0,1,0,0,0,0,0
6,"Fairview, Henry Farm, Oriole",0,0,1,0,0,0,0,0
7,"Fairview, Henry Farm, Oriole",0,0,1,0,0,0,0,0
8,"Fairview, Henry Farm, Oriole",0,0,1,0,0,0,0,0
9,"Fairview, Henry Farm, Oriole",0,0,1,0,0,0,0,0


In [42]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()


In [43]:
toronto_grouped

Unnamed: 0,Neighborhood,Arts & Entertainment,College & University,Food,Nightlife Spot,Outdoors & Recreation,Professional & Other Places,Shop & Service,Travel & Transport
0,"Adelaide, King, Richmond",0.06,0.0,0.64,0.07,0.06,0.03,0.1,0.04
1,Agincourt,0.0,0.0,0.25,0.25,0.25,0.0,0.25,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.555556,0.0,0.0,0.0,0.444444,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.444444,0.111111,0.333333,0.0,0.111111,0.0
5,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.6,0.0,0.05,0.0,0.35,0.0
6,Bayview Village,0.0,0.0,0.75,0.0,0.0,0.0,0.25,0.0
7,"Bedford Park, Lawrence Manor East",0.0,0.0,0.75,0.041667,0.0,0.0,0.208333,0.0
8,Berczy Park,0.087719,0.0,0.508772,0.105263,0.052632,0.0,0.22807,0.017544
9,"Birch Cliff, Cliffside West",0.25,0.25,0.25,0.0,0.25,0.0,0.0,0.0


In [44]:
toronto_grouped.shape

(100, 9)

In [45]:
toronto_grouped_with_count = toronto_grouped

toronto_grouped_with_count

Unnamed: 0,Neighborhood,Arts & Entertainment,College & University,Food,Nightlife Spot,Outdoors & Recreation,Professional & Other Places,Shop & Service,Travel & Transport
0,"Adelaide, King, Richmond",0.06,0.0,0.64,0.07,0.06,0.03,0.1,0.04
1,Agincourt,0.0,0.0,0.25,0.25,0.25,0.0,0.25,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.555556,0.0,0.0,0.0,0.444444,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.444444,0.111111,0.333333,0.0,0.111111,0.0
5,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.6,0.0,0.05,0.0,0.35,0.0
6,Bayview Village,0.0,0.0,0.75,0.0,0.0,0.0,0.25,0.0
7,"Bedford Park, Lawrence Manor East",0.0,0.0,0.75,0.041667,0.0,0.0,0.208333,0.0
8,Berczy Park,0.087719,0.0,0.508772,0.105263,0.052632,0.0,0.22807,0.017544
9,"Birch Cliff, Cliffside West",0.25,0.25,0.25,0.0,0.25,0.0,0.0,0.0


In [46]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [47]:

num_top_venues = 8

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted2 = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted2['Neighborhood'] = toronto_grouped_with_count['Neighborhood']


for ind in np.arange(toronto_grouped_with_count.shape[0]):
    neighborhoods_venues_sorted2.iloc[ind, 1:] = return_most_common_venues(toronto_grouped_with_count.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted2 = pd.merge(neighborhoods_venues_sorted2, toronto_venues_merged_count, on='Neighborhood')

neighborhoods_venues_sorted2.rename(columns={'Top-Level Category':'Number of Venues'}, inplace=True)
    
neighborhoods_venues_sorted2.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Number of Venues
0,"Adelaide, King, Richmond",Food,Shop & Service,Nightlife Spot,Outdoors & Recreation,Arts & Entertainment,Travel & Transport,Professional & Other Places,College & University,100
1,Agincourt,Shop & Service,Outdoors & Recreation,Nightlife Spot,Food,Travel & Transport,Professional & Other Places,College & University,Arts & Entertainment,4
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Outdoors & Recreation,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,Food,College & University,Arts & Entertainment,2
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Food,Shop & Service,Travel & Transport,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,9
4,"Alderwood, Long Branch",Food,Outdoors & Recreation,Shop & Service,Nightlife Spot,Travel & Transport,Professional & Other Places,College & University,Arts & Entertainment,9


### Cluster the Neighborhoods

After experimentation and evaluation of results, 3 was identified as the optimal cluster number to obtain a meaningful result.

In [48]:
# set number of clusters
kclusters2 = 3

toronto_grouped_clustering2 = toronto_grouped_with_count.drop('Neighborhood', 1)

# run k-means clustering
kmeans2 = KMeans(n_clusters=kclusters2, random_state=0).fit(toronto_grouped_clustering2)

# check cluster labels generated for each row in the dataframe
kmeans2.labels_[0:8] 

array([0, 2, 1, 0, 0, 0, 0, 0], dtype=int32)

In [50]:
neighborhoods_venues_sorted2.insert(0, 'Cluster Labels', kmeans2.labels_)

### Merge venue data with neighborhood geography data

In [51]:
toronto_merged2 = t_only_data

# neighborhoods with no venue data will be dropped via the inner join
toronto_merged2 = toronto_merged2.join(neighborhoods_venues_sorted2.set_index('Neighborhood'), on='Neighborhood', how='inner')

toronto_merged2.head() # check the last columns!


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Number of Venues
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0,Food,Travel & Transport,Shop & Service,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,1
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,2,Nightlife Spot,Travel & Transport,Shop & Service,Professional & Other Places,Outdoors & Recreation,Food,College & University,Arts & Entertainment,1
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0,Food,Travel & Transport,Shop & Service,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,7
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Food,Outdoors & Recreation,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,4
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Food,Shop & Service,Outdoors & Recreation,Travel & Transport,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,7


### Review the data in each cluster to identify Personas

In [52]:
toronto_merged2.loc[toronto_merged2['Cluster Labels'] == 0, toronto_merged2.columns[[1] + [2] + list(range(5, toronto_merged2.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Number of Venues
0,Scarborough,"Rouge, Malvern",0,Food,Travel & Transport,Shop & Service,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,1
2,Scarborough,"Guildwood, Morningside, West Hill",0,Food,Travel & Transport,Shop & Service,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,7
3,Scarborough,Woburn,0,Food,Outdoors & Recreation,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,4
4,Scarborough,Cedarbrae,0,Food,Shop & Service,Outdoors & Recreation,Travel & Transport,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,7
7,Scarborough,"Clairlea, Golden Mile, Oakridge",0,Travel & Transport,Food,Outdoors & Recreation,Shop & Service,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,10
8,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",0,Travel & Transport,Food,Shop & Service,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,2
9,Scarborough,"Birch Cliff, Cliffside West",0,Outdoors & Recreation,Food,College & University,Arts & Entertainment,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,4
10,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",0,Food,Shop & Service,Travel & Transport,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,7
11,Scarborough,"Maryvale, Wexford",0,Food,Shop & Service,Travel & Transport,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,7
13,Scarborough,"Clarks Corners, Sullivan, Tam O'Shanter",0,Food,Shop & Service,Travel & Transport,Professional & Other Places,Outdoors & Recreation,Nightlife Spot,College & University,Arts & Entertainment,11


In [53]:
toronto_merged2.loc[toronto_merged2['Cluster Labels'] == 1, toronto_merged2.columns[[1] + [2] + list(range(5, toronto_merged2.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,Number of Venues
14,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",1,Outdoors & Recreation,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,Food,College & University,Arts & Entertainment,2
17,North York,Hillcrest Village,1,Outdoors & Recreation,Food,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,6
30,North York,"CFB Toronto, Downsview East",1,Travel & Transport,Outdoors & Recreation,Shop & Service,Professional & Other Places,Nightlife Spot,Food,College & University,Arts & Entertainment,2
37,East Toronto,The Beaches,1,Outdoors & Recreation,Shop & Service,Nightlife Spot,Travel & Transport,Professional & Other Places,Food,College & University,Arts & Entertainment,5
40,East York,East Toronto,1,Outdoors & Recreation,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,Food,College & University,Arts & Entertainment,4
44,Central Toronto,Lawrence Park,1,Travel & Transport,Professional & Other Places,Outdoors & Recreation,Shop & Service,Nightlife Spot,Food,College & University,Arts & Entertainment,3
48,Central Toronto,"Moore Park, Summerhill East",1,Outdoors & Recreation,Food,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,4
50,Downtown Toronto,Rosedale,1,Outdoors & Recreation,Professional & Other Places,Travel & Transport,Shop & Service,Nightlife Spot,Food,College & University,Arts & Entertainment,5
64,Central Toronto,"Forest Hill North, Forest Hill West",1,Outdoors & Recreation,Shop & Service,Food,Travel & Transport,Professional & Other Places,Nightlife Spot,College & University,Arts & Entertainment,4
73,York,Humewood-Cedarvale,1,Outdoors & Recreation,Arts & Entertainment,Travel & Transport,Shop & Service,Professional & Other Places,Nightlife Spot,Food,College & University,4


In [None]:
toronto_merged2.loc[toronto_merged2['Cluster Labels'] == 2, toronto_merged2.columns[[1] + [2] + list(range(5, toronto_merged2.shape[1]))]]

### Assign Persona Labels

In [None]:
# Assign Persona function

def persona(c):
    if c['Cluster Labels'] == 0:
        return "Urban Foodie"
    if c['Cluster Labels'] == 1:
        return "Outdoor Enthusiast"
    if c['Cluster Labels'] == 2:
        return "Suburban Shopper"
    
    else:
        return "Undefined"

In [None]:
toronto_merged2['Persona Label'] = toronto_merged2.apply(persona,axis=1)
toronto_merged2.head()

## Map of Results

In [None]:

# create map
map_clusters2 = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters2)
ys = [i + x + (i*x)**2 for i in range(kclusters2)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster, persona, venue_counts in zip(toronto_merged2['Latitude'], toronto_merged2['Longitude'], toronto_merged2['Neighborhood'], toronto_merged2['Cluster Labels'], toronto_merged2['Persona Label'], toronto_merged2['Number of Venues']):
    label = folium.Popup(str(poi)  + ' - ' + str(persona) + ' - ' + str(venue_counts) + ' Venues', parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters2)
       
map_clusters2