# Segmenting and Clustering Neighborhoods in Toronto - All Notebook

## Part 1 : Loading Toronto data in dataframe

In [34]:
import numpy as np
import pandas as pd

#### Loading data from Wikipedia

In [35]:
df=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]

In [36]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West
285,M8Z,Etobicoke,South of Bloor


#### Rename column Neighbourhood to Neighborhood

In [37]:
df.rename(columns={'Neighbourhood': 'Neighborhood'}, inplace=True)

#### Removing rows with value of Borough "Not assigned"

In [38]:
df=df[df['Borough']!='Not assigned']

In [39]:
df=df.groupby(['Postcode']).agg({'Borough': 'first','Neighborhood':lambda x: ", ".join(x)}).reset_index()

#### Assigning the borough name to the neighborhood with value "Not assigned"

In [40]:
df[df['Neighborhood']=='Not assigned']

Unnamed: 0,Postcode,Borough,Neighborhood
93,M9A,Queen's Park,Not assigned


In [41]:
for i in range(len(df)):
    if df['Neighborhood'].iloc[i]=='Not assigned':
        df['Neighborhood'].iloc[i]=df['Borough'].iloc[i]

In [42]:
df.loc[93]

Postcode                 M9A
Borough         Queen's Park
Neighborhood    Queen's Park
Name: 93, dtype: object

#### The size of dataframe

In [43]:
df.shape

(103, 3)

## Part 2 : Latitude and the longitude coordinates of each neighborhood

#### Loading latitude and longitude from Google Maps

In [44]:
dfc=pd.read_csv('http://cocl.us/Geospatial_data')

In [45]:
dfc

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


#### Creating a new dataframe by meging df and dfc (Adding latitude and longitue to post code)

In [46]:
dfcn=df.merge(dfc,left_on='Postcode',right_on='Postal Code')

In [47]:
dfcn

Unnamed: 0,Postcode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
...,...,...,...,...,...,...
98,M9N,York,Weston,M9N,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,M9P,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",M9R,43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",M9V,43.739416,-79.588437


In [48]:
dfcn.drop('Postal Code',axis=1, inplace=True)

In [49]:
dfcn

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437


## Part 3 : Exploring and Clustering the Neighborhoods in Toronto

#### Unique values of Neighborhoods

#### Unique values of Borough

In [50]:
print(dfcn["Neighborhood"].unique())

['Rouge, Malvern' 'Highland Creek, Rouge Hill, Port Union'
 'Guildwood, Morningside, West Hill' 'Woburn' 'Cedarbrae'
 'Scarborough Village' 'East Birchmount Park, Ionview, Kennedy Park'
 'Clairlea, Golden Mile, Oakridge'
 'Cliffcrest, Cliffside, Scarborough Village West'
 'Birch Cliff, Cliffside West'
 'Dorset Park, Scarborough Town Centre, Wexford Heights'
 'Maryvale, Wexford' 'Agincourt' "Clarks Corners, Sullivan, Tam O'Shanter"
 "Agincourt North, L'Amoreaux East, Milliken, Steeles East"
 "L'Amoreaux West" 'Upper Rouge' 'Hillcrest Village'
 'Fairview, Henry Farm, Oriole' 'Bayview Village'
 'Silver Hills, York Mills' 'Newtonbrook, Willowdale' 'Willowdale South'
 'York Mills West' 'Willowdale West' 'Parkwoods' 'Don Mills North'
 'Flemingdon Park, Don Mills South'
 'Bathurst Manor, Downsview North, Wilson Heights'
 'Northwood Park, York University' 'CFB Toronto, Downsview East'
 'Downsview West' 'Downsview Central' 'Downsview Northwest'
 'Victoria Village' 'Woodbine Gardens, Parkview Hi

In [51]:
print('Number of unique values of Neighborhood : ' + str(len(dfcn["Neighborhood"].unique())))

Number of unique values of Neighborhood : 102


In [52]:
print(dfcn["Borough"].unique())

['Scarborough' 'North York' 'East York' 'East Toronto' 'Central Toronto'
 'Downtown Toronto' 'York' 'West Toronto' 'Mississauga' 'Etobicoke'
 "Queen's Park"]


In [53]:
print('Number of unique values of Borough : ' + str(len(dfcn["Borough"].unique())))

Number of unique values of Borough : 11


#### Loading libraries

In [54]:
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors

# MAP Library
import folium 

# Converting address into Latitude and Longitude
from geopy.geocoders import Nominatim


#### Initializing parameters and getting ltitude and longitude of Toronto

In [55]:
#Toronto coordinates
address = 'Toronto, ON, Canada'
geo = Nominatim(user_agent="ca_explorer")
location = geo.geocode(address, timeout=3)
latitude = location.latitude
longitude = location.longitude
print("Latitude of Toronto is : " + str(latitude))
print("Longitude of Toronto is : " + str(longitude))

Latitude of Toronto is : 43.653963
Longitude of Toronto is : -79.387207


#### Generating Toronto Map

In [56]:
# Initializing Toronto Map
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# Adding markers
for latitude, longitude, borough, neighborhood in zip(dfcn['Latitude'], dfcn['Longitude'], dfcn['Borough'], dfcn['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([latitude, longitude],radius=6,popup=label,fill=True,
        color='blue',fill_color='blue',fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)  
 
display(map_toronto)

#### Exploring a simple of Neighborhoods

In [57]:
# I will study Scarborough Neighborhood and generalize for all Toronto Neighborhoods
dfcn[dfcn['Neighborhood']=='Westmount']

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
99,M9P,Etobicoke,Westmount,43.696319,-79.532242


In [58]:
nh=dfcn.loc[99, "Neighborhood"]
nh_latitude, nh_longitude = dfcn.loc[9, "Latitude"], dfcn.loc[99, "Longitude"]
print('Latitude of ' + nh + ' Neighborhood : ' + str(nh_latitude))
print('Longitude of ' + nh + ' Neighborhood : ' + str(nh_longitude))

Latitude of Westmount Neighborhood : 43.692657000000004
Longitude of Westmount Neighborhood : -79.53224240000002


#### Foursquare Parameters 

In [59]:
client_id = 'GQKZ2MGJFNYU0E05E03PQS4JDTICAV3M5Z344BISJCGWUIXG'
client_secret = '5TZKCEP3AUSYBQL2IMAALB5HUYZAENJIWX4MJXFOHTGQN4KE'
version = '20200101'

#### Exploring venues with Foursquare API

In [60]:
number = 100 # Number of venues returned by Foursquare API is limited to 100
radius = 500 
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    client_id, client_secret, version, nh_latitude, nh_longitude, radius, number)

#### Getting venues

In [61]:
results = requests.get(url).json()

#### Function for getting venue catagory

In [62]:
def get_categories(row):
    try:
        cat_list = row['categories']
    except:
        cat_list = row['venue.categories']
        
    if len(cat_list) == 0:
        return None
    else:
        return cat_list[0]['name']

#### Converting json data to dataframe

In [63]:
from pandas.io.json import json_normalize
venues = results['response']['groups'][0]['items']
nv = json_normalize(venues)

# filtering columns
f_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nv =nv.loc[:, f_columns]

# filter the category for each row
nv['venue.categories'] = nv.apply(get_categories, axis=1)

# clean columns
nv.columns = [col.split(".")[-1] for col in nv.columns]


#### Display dataframe

In [64]:
nv

Unnamed: 0,name,categories,lat,lng
0,Mayflower Chinese Food,Chinese Restaurant,43.692753,-79.531566
1,Subway,Sandwich Place,43.692927,-79.531471
2,Starbucks,Coffee Shop,43.696405,-79.533479
3,Petro-Canada,Gas Station,43.690165,-79.530166
4,Pizza Hut,Pizza Place,43.696431,-79.533233
5,Metro,Supermarket,43.691414,-79.531148
6,Baskin-Robbins,Ice Cream Shop,43.691547,-79.531702
7,Royal York Fruit Market,Flea Market,43.691494,-79.531471
8,Dollarama,Discount Store,43.691945,-79.531593


#### Making the same data processing for all Neighborhoods

#### Applying getNearby_Venues to create venues of Toronto

In [65]:
def getNearby_Venues(names, latitudes, longitudes, radius):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # creating the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id, client_secret, version, lat, lng, radius, number)
            
        # getting results
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return information for each nearby venue
        venues_list.append([(name,lat,lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])

    nv = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nv.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nv)

##### Creating a new dataframe for Toronto Venues

In [66]:
r=500
venues_toronto = getNearby_Venues(names=dfcn['Neighborhood'],latitudes=dfcn['Latitude'],longitudes=dfcn['Longitude'],radius=r)

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

#### The size to venues_toronto dataframe

In [67]:
print(venues_toronto.shape)

(2222, 7)


In [68]:
venues_toronto.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [69]:
venues_toronto.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Agincourt,5,5,5,5,5,5
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",9,9,9,9,9,9
"Alderwood, Long Branch",10,10,10,10,10,10
...,...,...,...,...,...,...
Willowdale West,6,6,6,6,6,6
Woburn,3,3,3,3,3,3
"Woodbine Gardens, Parkview Hill",14,14,14,14,14,14
Woodbine Heights,9,9,9,9,9,9


#### Transforming venues_toronto dataframe by using One Hot Encoding

In [70]:
# using one hot encoding
one_hot_toronto = pd.get_dummies(venues_toronto[['Venue Category']], prefix="", prefix_sep="")

# adding neighborhood column back to dataframe
one_hot_toronto['Neighborhood'] = venues_toronto['Neighborhood'] 

# moving neighborhood column to the first column
fixed_columns = [one_hot_toronto.columns[-1]] + list(one_hot_toronto.columns[:-1])
one_hot_toronto = one_hot_toronto[fixed_columns]

one_hot_toronto.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Grouping rows by Neighborhood

In [71]:
grouped_toronto = one_hot_toronto.groupby('Neighborhood').mean().reset_index()
grouped_toronto.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Printing each Neighborhood 

In [72]:
top_venues = 10

for gt in grouped_toronto['Neighborhood']:
    print("«  "+gt+"  »")
    t = grouped_toronto[grouped_toronto['Neighborhood'] == gt].T.reset_index()
    t.columns = ['venue','freq']
    t = t.iloc[1:]
    t['freq'] = t['freq'].astype(float)
    t = t.round({'freq': 2})
    print(t.sort_values('freq', ascending=False).reset_index(drop=True).head(top_venues))
    print('\n')

«  Adelaide, King, Richmond  »
              venue  freq
0       Coffee Shop  0.07
1              Café  0.04
2        Steakhouse  0.04
3               Bar  0.04
4        Restaurant  0.03
5   Thai Restaurant  0.03
6      Burger Joint  0.03
7             Hotel  0.03
8  Asian Restaurant  0.03
9      Concert Hall  0.02


«  Agincourt  »
                       venue  freq
0                     Lounge   0.2
1  Latin American Restaurant   0.2
2               Skating Rink   0.2
3             Clothing Store   0.2
4             Breakfast Spot   0.2
5   Mediterranean Restaurant   0.0
6                Men's Store   0.0
7             Medical Center   0.0
8                      Motel   0.0
9              Metro Station   0.0


«  Agincourt North, L'Amoreaux East, Milliken, Steeles East  »
                        venue  freq
0                  Playground  0.33
1                        Park  0.33
2            Sculpture Garden  0.33
3                 Yoga Studio  0.00
4          Mexican Restaurant  0.00

                      venue  freq
0                  Bus Line  0.22
1                    Bakery  0.22
2             Metro Station  0.11
3              Intersection  0.11
4      Fast Food Restaurant  0.11
5              Soccer Field  0.11
6                      Park  0.11
7            Massage Studio  0.00
8            Medical Center  0.00
9  Mediterranean Restaurant  0.00


«  Clarks Corners, Sullivan, Tam O'Shanter  »
                 venue  freq
0             Pharmacy  0.14
1          Pizza Place  0.14
2         Noodle House  0.07
3  Fried Chicken Joint  0.07
4   Italian Restaurant  0.07
5                 Bank  0.07
6      Thai Restaurant  0.07
7          Gas Station  0.07
8   Chinese Restaurant  0.07
9  Rental Car Location  0.07


«  Cliffcrest, Cliffside, Scarborough Village West  »
                        venue  freq
0                       Motel  0.33
1         American Restaurant  0.33
2                Skating Rink  0.33
3  Modern European Restaurant  0.00
4           Mobile Phon

                 venue  freq
0                  Spa  0.12
1         Intersection  0.12
2  Rental Car Location  0.12
3   Mexican Restaurant  0.12
4    Electronics Store  0.12
5       Medical Center  0.12
6       Breakfast Spot  0.12
7          Pizza Place  0.12
8         Hockey Arena  0.00
9         Home Service  0.00


«  Harbord, University of Toronto  »
                 venue  freq
0                 Café  0.14
1            Bookstore  0.06
2                  Bar  0.06
3       Sandwich Place  0.06
4           Restaurant  0.06
5               Bakery  0.06
6  Japanese Restaurant  0.06
7          Yoga Studio  0.03
8         Dessert Shop  0.03
9   Chinese Restaurant  0.03


«  Harbourfront  »
                venue  freq
0         Coffee Shop  0.17
1                Park  0.06
2                 Pub  0.06
3              Bakery  0.06
4                Café  0.04
5          Restaurant  0.04
6      Breakfast Spot  0.04
7  Mexican Restaurant  0.04
8             Brewery  0.02
9          Beer Store 

                      venue  freq
0      Fast Food Restaurant   1.0
1       Monument / Landmark   0.0
2                 Locksmith   0.0
3                    Lounge   0.0
4                    Market   0.0
5            Massage Studio   0.0
6            Medical Center   0.0
7  Mediterranean Restaurant   0.0
8               Men's Store   0.0
9             Metro Station   0.0


«  Runnymede, Swansea  »
                       venue  freq
0                Pizza Place  0.09
1                Coffee Shop  0.09
2                       Café  0.09
3         Italian Restaurant  0.06
4           Sushi Restaurant  0.06
5          Fish & Chips Shop  0.03
6                        Bar  0.03
7                  Juice Bar  0.03
8  Latin American Restaurant  0.03
9          French Restaurant  0.03


«  Ryerson, Garden District  »
                       venue  freq
0                Coffee Shop  0.09
1             Clothing Store  0.05
2                       Café  0.04
3             Cosmetics Shop  0.04
4     

#### Creating a new dataframe from previews result

In [73]:
# Creating a function to sort the venues in descending order
def get_most_common_venues(row, top_venues):
    row_categories = row.iloc[1:]
    sorted_row_categories = row_categories.sort_values(ascending=False) 
    return sorted_row_categories.index.values[0:top_venues]

In [74]:
# Generating the new dataframe

indicators = ['st', 'nd', 'rd']

# creating columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# creating the new dataframe
sorted_neighborhoods_venues = pd.DataFrame(columns=columns)
sorted_neighborhoods_venues['Neighborhood'] = grouped_toronto['Neighborhood']

for ind in np.arange(grouped_toronto.shape[0]):
    sorted_neighborhoods_venues.iloc[ind, 1:] = get_most_common_venues(grouped_toronto.iloc[ind, :], top_venues)

sorted_neighborhoods_venues.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Café,Bar,Burger Joint,Hotel,Thai Restaurant,Restaurant,Asian Restaurant,Vegetarian / Vegan Restaurant
1,Agincourt,Latin American Restaurant,Lounge,Skating Rink,Clothing Store,Breakfast Spot,Donut Shop,Diner,Discount Store,Dog Run,Doner Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Sculpture Garden,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Liquor Store,Pharmacy,Pizza Place,Fried Chicken Joint,Beer Store,Fast Food Restaurant,Sandwich Place,Discount Store,Dessert Shop
4,"Alderwood, Long Branch",Pizza Place,Gym,Coffee Shop,Skating Rink,Pharmacy,Sandwich Place,Athletics & Sports,Pool,Pub,Dim Sum Restaurant


#### Running k-means to cluster Neighborhood into 5 clusters

In [75]:
# K-means from clustering
from sklearn.cluster import KMeans

# Number of clusters
kc = 5

toronto_clusters = grouped_toronto.drop('Neighborhood', 1)

# running kmeans function
kmeans = KMeans(n_clusters=kc, random_state=0).fit(toronto_clusters)

# adding cluster labels
sorted_neighborhoods_venues.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_clustered = dfcn

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_clustered = toronto_clustered.join(sorted_neighborhoods_venues.set_index('Neighborhood'), on='Neighborhood')
toronto_clustered=toronto_clustered.dropna(subset=['Cluster Labels'])

#### Visualizing Toronto Clusters

In [76]:
# Generating MAP
map_toronto_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# Setting colors  for the clusters
x = np.arange(kc)
y = [i + x + (i*x)**2 for i in range(kc)]
colors_array = cm.rainbow(np.linspace(0, 1, len(y)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Adding markers
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_clustered['Latitude'], toronto_clustered['Longitude'], toronto_clustered['Neighborhood'], toronto_clustered['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon],radius=5,popup=label,color=rainbow[int(cluster-1)],fill=True,
        fill_color=rainbow[int(cluster-1)],fill_opacity=0.5).add_to(map_toronto_clusters)
       
# Displaying map
display(map_toronto_clusters)

#### Cluster 1 :

In [77]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 0, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0.0,Golf Course,Bar,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
3,Scarborough,0.0,Coffee Shop,Korean Restaurant,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Dumpling Restaurant
4,Scarborough,0.0,Gas Station,Bakery,Fried Chicken Joint,Caribbean Restaurant,Athletics & Sports,Thai Restaurant,Bank,Hakka Restaurant,Drugstore,Donut Shop
6,Scarborough,0.0,Discount Store,Hobby Shop,Bus Station,Coffee Shop,Department Store,Women's Store,Dim Sum Restaurant,Diner,Dog Run,Doner Restaurant
7,Scarborough,0.0,Bakery,Bus Line,Park,Fast Food Restaurant,Metro Station,Soccer Field,Intersection,Women's Store,Dog Run,Diner
...,...,...,...,...,...,...,...,...,...,...,...,...
92,Etobicoke,0.0,Gym,Tanning Salon,Convenience Store,Discount Store,Burrito Place,Burger Joint,Sandwich Place,Fast Food Restaurant,Supplement Shop,Bakery
93,Queen's Park,0.0,Coffee Shop,Gym,Park,Diner,Portuguese Restaurant,Nightclub,Mexican Restaurant,Juice Bar,Italian Restaurant,Hobby Shop
95,Etobicoke,0.0,Shopping Plaza,Pizza Place,Convenience Store,Beer Store,Coffee Shop,Café,Pharmacy,Liquor Store,Park,Dumpling Restaurant
97,North York,0.0,Paper / Office Supplies Store,Furniture / Home Store,Baseball Field,Women's Store,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant


#### Cluster 2 :

In [78]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 1, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,1.0,Convenience Store,Playground,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop


#### Cluster 3 :

In [79]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 2, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
48,Central Toronto,2.0,Tennis Court,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Women's Store,Deli / Bodega


#### Cluster 4 :

In [80]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 3, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,3.0,Fast Food Restaurant,Women's Store,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Donut Shop,Doner Restaurant
2,Scarborough,3.0,Spa,Electronics Store,Pizza Place,Mexican Restaurant,Breakfast Spot,Rental Car Location,Intersection,Medical Center,Women's Store,Discount Store
13,Scarborough,3.0,Pharmacy,Pizza Place,Rental Car Location,Bank,Italian Restaurant,Fried Chicken Joint,Thai Restaurant,Fast Food Restaurant,Chinese Restaurant,Convenience Store
15,Scarborough,3.0,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Grocery Store,Breakfast Spot,Pharmacy,Pizza Place,Supermarket,Sandwich Place,Burger Joint
19,North York,3.0,Bank,Chinese Restaurant,Japanese Restaurant,Café,Women's Store,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run
24,North York,3.0,Grocery Store,Pizza Place,Discount Store,Coffee Shop,Butcher,Pharmacy,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Deli / Bodega
26,North York,3.0,Japanese Restaurant,Gym / Fitness Center,Caribbean Restaurant,Café,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
33,North York,3.0,Grocery Store,Liquor Store,Gym / Fitness Center,Athletics & Sports,Discount Store,Donut Shop,Dim Sum Restaurant,Diner,Dog Run,Doner Restaurant
35,East York,3.0,Pizza Place,Fast Food Restaurant,Pet Store,Breakfast Spot,Gym / Fitness Center,Café,Pharmacy,Gastropub,Bank,Bus Line
72,North York,3.0,Bakery,Pizza Place,Japanese Restaurant,Pub,Women's Store,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run


#### Cluster 5 :

In [81]:
toronto_clustered.loc[toronto_clustered['Cluster Labels'] == 4, toronto_clustered.columns[[1] + list(range(5, toronto_clustered.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,4.0,Park,Playground,Sculpture Garden,Dog Run,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant
20,North York,4.0,Park,Women's Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Drugstore
23,North York,4.0,Park,Bank,Convenience Store,Women's Store,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
25,North York,4.0,Bus Stop,Park,Food & Drink Shop,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Women's Store
30,North York,4.0,Park,Snack Place,Airport,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Drugstore
40,East York,4.0,Park,Convenience Store,Coffee Shop,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
44,Central Toronto,4.0,Park,Bus Line,Swim School,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant,Event Space
50,Downtown Toronto,4.0,Park,Trail,Playground,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
74,York,4.0,Park,Women's Store,Fast Food Restaurant,Market,Doner Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
98,York,4.0,Park,Convenience Store,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Women's Store


# The end

# Thank you