In [1]:
import pandas as pd
import numpy as np
import requests
import json
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans 
from geopy.geocoders import Nominatim
import os
import folium
from folium import plugins
from ipywidgets import interact
pd.set_option('display.max_rows', 250)
pd.set_option('display.max_colwidth', 210)


### Step 1 - Scrape Data from url and download
##### Use read_html to return a list of dataframe objects (tables) with headers from the url
##### While BeautifulSoup is an option for scraping data, in this case the pandas read_html is easy to use and straight forward.

In [2]:
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header=0)

### Step 2 - Create Pandas Dataframe
##### Assign the first table (0) from the read_html data and assign to new dataframe df

In [3]:
df = tables[0] # pulls the first table[0] on the html page and assigns to 'df' dataframe 
df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Step 3a - Clean dataframe by removing rows with postal codes not assigned to Boroughs
##### Use the pandas vectorized string with the tilde complement operator to filter out rows with 'Not assigned' in the Boroughs Column

In [4]:
df = df[~df.Borough.str.contains('Not assigned')]

##### Increase column width and show all rows to review data

In [5]:
pd.set_option('display.max_rows', 250)
pd.set_option('display.max_colwidth', 210)

### Step 3b - Where a Borough does not have an assigned Neighbourhood value, use value of Bourough in adjacent Neighbourhood column 


##### Show all rows where cells in the Neighbourhood column have a value of 'Not assigned'

In [6]:
not_assigned = df.loc[df.Neighbourhood == 'Not assigned']
not_assigned.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
8,M7A,Queen's Park,Not assigned


##### Replace any instance of "Not Assigned" in Neighbourhood Column with the value in the adjacent cell(Borough) 

In [7]:
#use .copy() to make a hard copy of df and limit the SettingWithCopyWarning
df = df.copy()
df.loc[df['Neighbourhood']== 'Not assigned', 'Neighbourhood'] = df['Borough']
df.head(10) #'Queens Park' now shows Neighbourhood column has the same value as the Borough column 

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


### Step 3c - Combine instances where a postal code has multiple Neighbourhood values, separating with a comma
##### Use groupby, finding columns 'Postcode' and Borough with repeating data and join corresponding cells in the 'Neighbourhood column with comma separators, then reset index

In [8]:
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(','.join).reset_index()
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [9]:
#Remove hash below in order to save to excel
#df.to_excel("Toronto_Neighbourhoods.xlsx", sheet_name='Data')

### Step 4 - Show the shape of the dataframe

In [10]:
df.shape

(103, 3)

### Step 5 - Download Geo Data and merge into existing dataframe

In [11]:
geo_df=pd.read_csv('http://cocl.us/Geospatial_data/Geospatial_Coordinates.csv')
df_merge1 = pd.merge(df, geo_df, left_on='Postcode', right_on='Postal Code')
if df_merge1['Postcode'].equals(df_merge1['Postal Code']):
    df_merge1=df_merge1.drop(['Postal Code'], axis=1)
    print('Success! Dropped Equal Duplicate Column "Postal Code"')
else:
    print('Columns are not duplicates, Please review dataframe')

Success! Dropped Equal Duplicate Column "Postal Code"


In [12]:
print('Dataframe with merged Geocoordinates, Shape:',df_merge1.shape)
df_merge1.head()

Dataframe with merged Geocoordinates, Shape: (103, 5)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [13]:
df_merge1=df_merge1.rename(index=str, columns={'Neighbourhood':'Neighborhood'})# Rename coloumn Neighborhood to ensure there are no conflicts with spelling differences
df_merge1.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [14]:
df_merge1.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [15]:
df_merge1.shape

(103, 5)

### Step 6 - Use folium to find coodinates for Toronto Ontario passing to Latitudes and Longitudes.
##### Use Nominatim with "toronto_explorer for the user_agent

In [16]:
address = 'toronto,on'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


### Step 7 - Create a map using folium with the above coordinates for Toronto
##### Added a geojson file taken from https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/bound-limit-2016-eng.cfm and formated from '.shp' file. This added boundaries for the FSA regions in Toronto. Added Markers to the map to show each neighborhood center location.

In [17]:
toronto_geojson=r"Toronto2.geojson"
map_toronto = folium.Map(location=[latitude+.06, longitude], zoom_start=11, tiles='Stamen Terrain')

def style_function(feature):
    return{'fillOpacity':.3,'weight':2, 'color':'#52658F','fillColor':'#C0B3A0','high_light':'true'}
folium.GeoJson( 
    toronto_geojson, 
    name='geojson',
    style_function=style_function
).add_to(map_toronto) 

# add markers to map
for lat, lng, borough, neighborhood in zip(df_merge1['Latitude'], df_merge1['Longitude'], df_merge1['Borough'], df_merge1['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.Circle(
        [lat, lng],
        radius=300,
        popup=label,
        color='#373F27',
        weight=1,
        fill=True,
        fill_color='#C06014',
        fill_opacity=.5,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Step 8 - Entered id and password for Foursquare 

In [56]:
CLIENT_ID = 'XXXXXXXXXX' # your Foursquare ID
CLIENT_SECRET = 'XXXXXXXXXX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XXXXXXXXXX
CLIENT_SECRET:XXXXXXXXXX


### Step 9a - Create a function that requests an api from Foursquare that returns venue and category information for each neighborhood in our database

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Step 9b - the next codeblock runs the above function with radius and LIMIT numbers, and returns 'Toronto_venues'

In [25]:
LIMIT = 100
radius=500
Toronto_venues = getNearbyVenues(names=df_merge1['Neighborhood'],
                                   latitudes=df_merge1['Latitude'],
                                   longitudes=df_merge1['Longitude']
                                  )

Rouge,Malvern
Highland Creek,Rouge Hill,Port Union
Guildwood,Morningside,West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park,Ionview,Kennedy Park
Clairlea,Golden Mile,Oakridge
Cliffcrest,Cliffside,Scarborough Village West
Birch Cliff,Cliffside West
Dorset Park,Scarborough Town Centre,Wexford Heights
Maryvale,Wexford
Agincourt
Clarks Corners,Sullivan,Tam O'Shanter
Agincourt North,L'Amoreaux East,Milliken,Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview,Henry Farm,Oriole
Bayview Village
Silver Hills,York Mills
Newtonbrook,Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park,Don Mills South
Bathurst Manor,Downsview North,Wilson Heights
Northwood Park,York University
CFB Toronto,Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens,Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West,Riverdale
The Beaches West,Indi

In [26]:
#Take a look at the data frame
Toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge,Malvern",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
3,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place


In [27]:
# Check Count of Venues for Each Neighbourhood
Toronto_venues.groupby('Neighborhood').Venue.count().sort_values(ascending=False).head()

Neighborhood
Adelaide,King,Richmond                     100
First Canadian Place,Underground city      100
St. James Town                             100
Chinatown,Grange Park,Kensington Market    100
Commerce Court,Victoria Hotel              100
Name: Venue, dtype: int64

In [28]:
#Show vanues and unique categories found
print('There are {} venues and {} unique categories.'.format(Toronto_venues['Venue'].count(),len(Toronto_venues['Venue Category'].unique())))

There are 2271 venues and 280 unique categories.


### Step 10 - Use One Hot Encoding to create a usable dataframe to analyze 

In [29]:
# one hot encoding
from collections import deque#This module implements specialized container datatypes providing alternatives to Python’s general purpose built-in containers

venues_onehot = pd.get_dummies(Toronto_venues["Venue Category"],
                             prefix = "",
                             prefix_sep = "")

venues_onehot["Neighborhood"] = Toronto_venues["Neighborhood"]


nindex = list(venues_onehot.columns).index("Neighborhood")
cols = deque(venues_onehot.columns)#ist-like container with fast appends and pops on either end
cols.rotate(-nindex)
cols = list(cols)
venues_onehot = venues_onehot[cols]

venues_onehot.head()

Unnamed: 0,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Repair Shop,...,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon
0,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Step 11 - Group rows by neighborhood and get the mean of the frequency of occurrence for each category

In [30]:
Toronto_grouped = venues_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped.head()

Unnamed: 0,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Other Repair Shop,...,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Motel,Movie Theater,Moving Target,Museum,Music Store,Music Venue,Nail Salon
0,"Adelaide,King,Richmond",0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North,L'Amoreaux East,Milliken,Steeles East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
Toronto_grouped.shape

(100, 280)

##### The Toronto_grouped dataframe gives me 100 neighborhoods which is less than the number of postal codes - so for some reason some of the rows were dropped or there was no data/venues for some of these locations.

### Step 12 - Create a dataframe that groups by neighborhood and returns a list of the top 10 venues for each

In [32]:
# write a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [33]:
#Dataframe is created displaying the top 10 venues
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,American Restaurant,Sushi Restaurant,Burger Joint,Restaurant,Bar,Bakery
1,Agincourt,Breakfast Spot,Skating Rink,Lounge,Clothing Store,Nail Salon,Antique Shop,Airport Lounge,Airport Service,Airport Terminal,American Restaurant
2,"Agincourt North,L'Amoreaux East,Milliken,Steeles East",Park,Playground,Nail Salon,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade
3,"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",Grocery Store,Fast Food Restaurant,Beer Store,Fried Chicken Joint,Sandwich Place,Pizza Place,Pharmacy,Airport Lounge,Airport Service,Airport Terminal
4,"Alderwood,Long Branch",Pizza Place,Pool,Coffee Shop,Sandwich Place,Pub,Gym,Pharmacy,Skating Rink,Wings Joint,Airport Service


In [34]:
neighborhoods_venues_sorted.shape #like the Toronto_grouped dataframe, this also shows 100 neighborhoods(less than the postal codes)

(100, 11)

### 13. Use the pricipal component analysis (PCA) from sklearn to dimensionally reduce to a lower dimensional space.

In [35]:
from sklearn.decomposition import PCA
pca = PCA(.95)
Toronto_grouped_clustering = pca.fit_transform(Toronto_grouped.drop('Neighborhood', 1))#fits the model with Toronto_grouped and applies the demensional reduction
Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

In [36]:
Toronto_grouped_clustering.shape

(100, 279)

### 14. With the correct dimensions for the dataframe complete the KMeans clustering

In [37]:
# set number of clusters
kclusters = 5

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
print(kmeans.labels_[0:10])
print(kmeans.labels_.shape)

[3 3 4 3 3 3 3 3 3 3]
(100,)


In [39]:
Toronto_grouped["Cluster Labels"] = kmeans.labels_

# add clustering labels
Toronto_merged = df_merge1.merge(Toronto_grouped, left_on = "Neighborhood", right_on = "Neighborhood", how = "outer")

# merge toronto_grouped with df_merge1 to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Toronto_merged["Cluster Labels"] = Toronto_merged["Cluster Labels"].fillna(5).astype("int")

Toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,New American Restaurant,Nightclub,Noodle House,Office,Opera House,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,0.0,0.0,0.0,0.0,0.0,...,Fast Food Restaurant,Print Shop,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Nail Salon,Airport Gate
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,0.0,0.0,0.0,0.0,0.0,...,Construction & Landscaping,Bar,Nail Salon,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arcade
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0.0,0.0,0.0,0.0,0.0,...,Intersection,Electronics Store,Spa,Medical Center,Pizza Place,Breakfast Spot,Mexican Restaurant,Rental Car Location,Aquarium,Antique Shop
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,0.0,0.0,0.0,0.0,...,Coffee Shop,Korean Restaurant,Nail Salon,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,0.0,0.0,0.0,0.0,...,Caribbean Restaurant,Fried Chicken Joint,Bakery,Bank,Hakka Restaurant,Athletics & Sports,Thai Restaurant,Video Game Store,Airport Gate,Arts & Crafts Store


## 15. Create Map showing Clusters

In [40]:
map_clusters = folium.Map(location=[latitude+.06, longitude], zoom_start=11, tiles='Stamen Terrain')

def style_function(feature):
    return{'fillOpacity':.3,'weight':2, 'color':'#52658F','fillColor':'#C0B3A0','high_light':'true'}
folium.GeoJson( 
    toronto_geojson, 
    name='geojson',
    style_function=style_function
).add_to(map_clusters) 

kclusters = kclusters + 1

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 16. Analysis of Clusters

### CLuster 1
##### This cluster contains parks, salons and is dominated by Airport services

In [52]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,Cafeteria,Nail Salon,Aquarium,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arcade
23,Park,Bank,Nail Salon,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade
25,Fast Food Restaurant,Food & Drink Shop,Park,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Nail Salon
30,Park,Bus Stop,Other Repair Shop,Airport,Arts & Crafts Store,Art Museum,Asian Restaurant,Art Gallery,Argentinian Restaurant,Athletics & Sports
31,Park,Moving Target,Bank,Grocery Store,Nail Salon,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant


### CLuster 2
##### This cluster has two neighborhoods with baseball fields being very popular

In [51]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Food Truck,Baseball Field,Nail Salon,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium
97,Baseball Field,Nail Salon,Arcade,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Argentinian Restaurant


### CLuster 3
##### With only one neighborhood this cluster is set apart by

In [53]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
63,Garden,Nail Salon,Aquarium,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arcade


### CLuster 4
##### The largest cluster by far, this has dominate categories such as restaurants, bars, bakeries and coffee shops

In [54]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Fast Food Restaurant,Print Shop,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Nail Salon,Airport Gate
1,Construction & Landscaping,Bar,Nail Salon,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arcade
2,Intersection,Electronics Store,Spa,Medical Center,Pizza Place,Breakfast Spot,Mexican Restaurant,Rental Car Location,Aquarium,Antique Shop
3,Coffee Shop,Korean Restaurant,Nail Salon,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade
4,Caribbean Restaurant,Fried Chicken Joint,Bakery,Bank,Hakka Restaurant,Athletics & Sports,Thai Restaurant,Video Game Store,Airport Gate,Arts & Crafts Store


### CLuster 5
##### This cluster has playgrounds, nail salons and merchandise stores as its most common venues

In [55]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, "1st Most Common Venue":"10th Most Common Venue"].head()

Unnamed: 0,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Playground,Jewelry Store,Nail Salon,Aquarium,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Arcade
14,Park,Playground,Nail Salon,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arcade
48,Playground,Gym,Nail Salon,Antique Shop,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Aquarium
