## Segmenting and Clustering Neighborhoods in Toronto

For this assignment, you will be required to explore and cluster the neighborhoods in Toronto.

#### Importing needed Libraries

In [51]:
import pandas as pd # library to process data as dataframes
import numpy as np
import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

import requests
from bs4 import BeautifulSoup as bs

from sklearn.cluster import KMeans

print("Done importing libraries")

Done importing libraries


## PART 1 ---------------------------------------------------------------------------------------

#### Requesting data from the link 

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
req =requests.get(url)
print("Connection Status : ", req.status_code)
req.encoding = 'utf-8'
encoding = req.encoding
html_doc = req.text

Connection Status :  200


#### Creating the DataFrame 

In [3]:
columns = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(columns=columns)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood


#### Scrape PostalCode, Borough, Neighborhood from HTML_doc using BeautifulSoup

In [4]:
soup = bs(html_doc, 'html.parser')
table = soup.find('table', {'class' : 'wikitable'})
rows = table.tbody.find_all('tr')
for row in rows[1:]: 
    dictionary ={}
    td = row.find_all('td')
    if(td[1].get_text() != "Not assigned") :
        dictionary["PostalCode"] = td[0].get_text()
        dictionary["Borough"] = td[1].get_text()
        # One special case for the Borough Queen's Park  (it has that annoying ' ) with non assigned Neighborhood
        if(td[2].get_text() == "Not assigned" or dictionary["PostalCode"] == "M7A") : 
            dictionary["Neighborhood"] = td[1].get_text()
        else  :
            dictionary["Neighborhood"] = td[2].get_text().strip("\n")
            
        df = df.append(dictionary, ignore_index=True)
        


In [5]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


#### Just to verify that there is No  Not-Assigned Borough or Neighborhood

In [6]:
(df["Borough"]!= "Not assigned").describe().top

True

In [7]:
(df["Neighborhood"]!= "Not assigned").describe().top

True

#### Combining the Neighborhoods 

In [8]:
i = 1
while(i < len(df)):
    if df['PostalCode'].iloc[i] == df['PostalCode'].iloc[i - 1]:
        df.at[i - 1, 'Neighborhood'] = df.Neighborhood.iloc[i - 1] +', ' + df.Neighborhood.iloc[i]
        df.drop(index = i, inplace = True)
        df = df.reset_index().drop('index', axis = 1)
    else:
        i += 1
        
        
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [9]:
# verify table's row count
df.shape  

(103, 3)

## PART 2  -------------------------------------------------------------------------------------

##### tried the geocoder thing but takes too long and with missing data

In [13]:
#import geocoder
#latitudes = []
#longitudes = []
#for code in df.PostalCode:
#   # initialize your variable to None
#    coordinations = None
#
#   # loop until you get the coordinates
#   while(coordinations is None):
#       g = geocoder.google('{}, Toronto, Ontario'.format(code))
#       coordinations = g.latlng
#       
#
#   latitudes.append(coordinations[0])
#   longitudes.append(coordinations[1])
#if(len(latitudes) == 103 and len(longitudes) == 103) : print("We got all coordinations")
#else : print("Some thing is wrong we have only this : ", len(latitudes))

In [16]:
# Downloading the csv file
from urllib2 import urlopen
response = urlopen("https://cocl.us/Geospatial_data").read()
download = open("Geospatial_Coordinates.csv", "w")
download.write(response.read())
download.close()
print('Data downloaded!')

Data downloaded!


In [17]:
# reading the csv file
Geo = pd.read_csv("Geospatial_Coordinates.csv") 

# Rename the 'Postal Code' column
Geo.rename({'Postal Code': 'PostalCode'}, axis='columns', inplace=True)

In [18]:
# Merge Latitude and longitude to original dataframe
df = pd.merge(df, Geo, on='PostalCode')
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [24]:
df.to_csv('toronto.csv')

## PART 3 ---------------------------------------------------------------------------------

#### Create a map of Toronto

In [28]:
df = pd.read_csv("toronto.csv") 
import folium
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[43.706204, -79.398752], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#e182f2',
        fill_opacity=0.7).add_to(map_Toronto)  
# Show map Toronto with borough and neighborhood 
map_Toronto

#### Create a new data frame with neighborhoods in Scarborough

In [29]:
# @hiddel_cell
CLIENT_ID = 'PM5S4QXIE2V3AAMPJTXJ2WDDA1X5SYDHNXWEMFRAOK45XKTY' # your Foursquare ID
CLIENT_SECRET = 'EUQS3K3GPO04OBCAVT55YRRVQCRZHEQX3BEY0GHIEN45N4EX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version


In [30]:
scarborough_data = df[df['Borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head(7)

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,22,M1G,Scarborough,Woburn,43.770992,-79.216917
4,26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,38,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029


##### Create a map of Scarborough and its neighbourhoods

In [32]:
address_scar = 'Scarborough,Toronto'
latitude_scar = 43.773077
longitude_scar = -79.257774
map_scarb = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=12)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_scarb)  
    
map_scarb

####  top 100 venues in the neighborhood 'Rouge, Malvern', from Scarborough

In [34]:
neighborhood_latitude = scarborough_data.loc[0, 'Latitude'] # neighbourhood latitude value
neighborhood_longitude = scarborough_data.loc[0, 'Longitude'] # neighbourhood longitude value

neighborhood_name = scarborough_data.loc[0, 'Neighborhood'] # neighbourhood name

print('Latitude and longitude values of "{}" are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude_scar, longitude_scar, VERSION, radius, LIMIT)




Latitude and longitude values of "Rouge, Malvern" are 43.8066863, -79.19435340000003.


In [36]:
results = requests.get(url).json()

In [38]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [39]:
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']  
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head(10)

Unnamed: 0,name,categories,lat,lng
0,Disney Store,Toy / Game Store,43.775537,-79.256833
1,Tommy Hilfiger Company Store,Clothing Store,43.776015,-79.257369
2,DAVIDsTEA,Tea Room,43.776613,-79.258516
3,St. Andrews Fish & Chips,Fish & Chips Shop,43.771865,-79.252645
4,American Eagle Outfitters,Clothing Store,43.775908,-79.258352
5,Coliseum Scarborough Cinemas,Movie Theater,43.775995,-79.255649
6,SEPHORA,Cosmetics Shop,43.775592,-79.258242
7,Chipotle Mexican Grill,Mexican Restaurant,43.77641,-79.258069
8,Scarborough Town Centre,Shopping Mall,43.775231,-79.257462
9,Hot Topic,Clothing Store,43.77545,-79.257929


##### count venues

In [40]:
format(nearby_venues.shape[0])

'43'

In [41]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Venues for each neighborhood in Scarborough

In [43]:
scarborough_venues = getNearbyVenues(names=scarborough_data['Neighborhood'],
                                   latitudes=scarborough_data['Latitude'],
                                   longitudes=scarborough_data['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West, Steeles West
Upper Rouge


In [44]:
scarborough_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge, Malvern",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Affordable Toronto Movers,43.787919,-79.162977,Moving Target
4,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum


In [45]:
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,3,3,3,3,3,3
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",3,3,3,3,3,3
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,8,8,8,8,8,8
"Clairlea, Golden Mile, Oakridge",9,9,9,9,9,9
"Clarks Corners, Sullivan, Tam O'Shanter",9,9,9,9,9,9
"Cliffcrest, Cliffside, Scarborough Village West",2,2,2,2,2,2
"Dorset Park, Scarborough Town Centre, Wexford Heights",8,8,8,8,8,8
"East Birchmount Park, Ionview, Kennedy Park",7,7,7,7,7,7
"Guildwood, Morningside, West Hill",6,6,6,6,6,6


##### Unique Categories

In [46]:
len(scarborough_venues['Venue Category'].unique())

57

In [47]:
# one hot encoding
scarb_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarb_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarb_onehot.columns[-1]] + list(scarb_onehot.columns[:-1])
scarb_onehot = scarb_onehot[fixed_columns]

scarb_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,...,Print Shop,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Train Station,Vietnamese Restaurant
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
scarb_grouped = scarb_onehot.groupby('Neighborhood').mean().reset_index()
scarb_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,...,Print Shop,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Thai Restaurant,Train Station,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0
4,"Clairlea, Golden Mile, Oakridge",0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.222222,0.111111,...,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0


#### top 10 venues per neighborhood

In [49]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [52]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarb_grouped['Neighborhood']

for ind in np.arange(scarb_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarb_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Sandwich Place,Breakfast Spot,Lounge,Vietnamese Restaurant,Convenience Store,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint
1,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Coffee Shop,Playground,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
2,"Birch Cliff, Cliffside West",Skating Rink,General Entertainment,Café,College Stadium,Vietnamese Restaurant,Convenience Store,History Museum,Hakka Restaurant,Grocery Store,Furniture / Home Store
3,Cedarbrae,Caribbean Restaurant,Thai Restaurant,Athletics & Sports,Hakka Restaurant,Bakery,Bank,Fried Chicken Joint,Lounge,Vietnamese Restaurant,Discount Store
4,"Clairlea, Golden Mile, Oakridge",Bus Line,Bus Station,Metro Station,Ice Cream Shop,Park,Intersection,Bakery,Soccer Field,General Entertainment,Furniture / Home Store
5,"Clarks Corners, Sullivan, Tam O'Shanter",Pizza Place,Noodle House,Shopping Mall,Pharmacy,Fried Chicken Joint,Chinese Restaurant,Italian Restaurant,Thai Restaurant,Furniture / Home Store,Fast Food Restaurant
6,"Cliffcrest, Cliffside, Scarborough Village West",American Restaurant,Motel,Convenience Store,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
7,"Dorset Park, Scarborough Town Centre, Wexford ...",Indian Restaurant,Chinese Restaurant,Furniture / Home Store,Latin American Restaurant,Light Rail Station,Pet Store,Vietnamese Restaurant,Breakfast Spot,Discount Store,Hakka Restaurant
8,"East Birchmount Park, Ionview, Kennedy Park",Discount Store,Train Station,Bus Station,Chinese Restaurant,Coffee Shop,Department Store,Cosmetics Shop,Ice Cream Shop,History Museum,Hakka Restaurant
9,"Guildwood, Morningside, West Hill",Breakfast Spot,Rental Car Location,Electronics Store,Pizza Place,Medical Center,Mexican Restaurant,Vietnamese Restaurant,Convenience Store,Grocery Store,General Entertainment


#### K-MEAN CLUSTERING

In [53]:
scarb_data = scarborough_data.drop(16)
# set number of clusters
kclusters = 5

scarb_grouped_clustering = scarb_grouped.drop('Neighborhood', 1)


# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarb_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
#len(kmeans.labels_)#=16
#scarborough_data.shape

array([1, 4, 1, 1, 1, 1, 2, 1, 1, 1])

##### add kmeans.labels

In [55]:
scarb_merged = scarb_data

# add clustering labels
scarb_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scarb_merged = scarb_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scarb_merged

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,1,Fast Food Restaurant,Print Shop,Vietnamese Restaurant,College Stadium,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint
1,12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,4,History Museum,Bar,Moving Target,Vietnamese Restaurant,Convenience Store,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint
2,18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Breakfast Spot,Rental Car Location,Electronics Store,Pizza Place,Medical Center,Mexican Restaurant,Vietnamese Restaurant,Convenience Store,Grocery Store,General Entertainment
3,22,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Coffee Shop,Korean Restaurant,Mexican Restaurant,Vietnamese Restaurant,Convenience Store,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint
4,26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Caribbean Restaurant,Thai Restaurant,Athletics & Sports,Hakka Restaurant,Bakery,Bank,Fried Chicken Joint,Lounge,Vietnamese Restaurant,Discount Store
5,32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,1,Playground,Convenience Store,Vietnamese Restaurant,College Stadium,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint
6,38,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,2,Discount Store,Train Station,Bus Station,Chinese Restaurant,Coffee Shop,Department Store,Cosmetics Shop,Ice Cream Shop,History Museum,Hakka Restaurant
7,44,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,1,Bus Line,Bus Station,Metro Station,Ice Cream Shop,Park,Intersection,Bakery,Soccer Field,General Entertainment,Furniture / Home Store
8,51,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,1,American Restaurant,Motel,Convenience Store,History Museum,Hakka Restaurant,Grocery Store,General Entertainment,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
9,58,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,1,Skating Rink,General Entertainment,Café,College Stadium,Vietnamese Restaurant,Convenience Store,History Museum,Hakka Restaurant,Grocery Store,Furniture / Home Store


#### On the MAP

In [56]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location = [latitude_scar, longitude_scar], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarb_merged['Latitude'], scarb_merged['Longitude'], scarb_merged['Neighborhood'], scarb_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [57]:
scarb_merged.loc[scarb_merged['Cluster Labels'] == 0, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,PostalCode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,M1T,-79.304302,0,Pizza Place,Noodle House,Shopping Mall,Pharmacy,Fried Chicken Joint,Chinese Restaurant,Italian Restaurant,Thai Restaurant,Furniture / Home Store,Fast Food Restaurant
