# Question One

### Python Libs import


In [1]:
# For Question One and Two
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests

#For Question Three
import numpy as np
import json
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

# Foursqure id and secret imported from Environment 
import os
CLIENT_ID = os.environ.get('CLIENT_ID')
CLIENT_SECRET = os.environ.get('CLIENT_SECRET')
VERSION = '20180605'

#### Getting the table

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

full_page = requests.get(url).text
convert_to_bs4 = bs(full_page, 'html')
just_table = convert_to_bs4.find('table', {'class': 'wikitable'}).find_all('tr')

the_table = []

for i in just_table:
    td = i.find_all('td')
    the_table.append([i.text for i in td])

#### Convert to the DataFrame

In [3]:
df = pd.DataFrame(the_table)
df.head()

Unnamed: 0,0,1,2
0,,,
1,M1A\n,Not assigned\n,Not assigned\n
2,M2A\n,Not assigned\n,Not assigned\n
3,M3A\n,North York\n,Parkwoods\n
4,M4A\n,North York\n,Victoria Village\n


#### Droping Na, renaming the colums, strip whitespace \n, 'Not assigned', reset index

In [4]:
# droping na
df = df.dropna()

# renaming the columns
df.rename(columns={0:'PostalCode', 1: 'Borough', 2: 'Neighborhood'}, inplace=True)

# strip whitespace
df['PostalCode'] = df['PostalCode'].str.strip('\n')
df['Borough'] =df['Borough'].str.strip('\n')
df['Neighborhood'] = df['Neighborhood'].str.strip('\n')

# removing 'Not assigned'
df = df[df['Borough'] != 'Not assigned']

# reset index
df.reset_index(inplace=True, drop=True)

# shape 103, 3
df.shape

(103, 3)

# Question Two

##### I tried other methods but only reading csv worked for me
###### downloaded from https://cocl.us/Geospatial_data as instructed in Instructions 

In [5]:
geo_csv= pd.read_csv('Geospatial_Coordinates.csv')

#### Remaning the columns 

In [6]:
geo_csv.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

#### Merging with the main dataframe

In [7]:
df = df.merge(geo_csv, on='PostalCode')
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# Question Three

### For this section I will analysis on neighborhood of  'Central Toronto' borough of Toronto

#### Reason for the analysis is for the people who want  to move to Central Toronto based on their preferable venues

In [12]:
# getting latitude and longitude of 'Central Toronto' 

address = 'Central Toronto, Toronto, CA'
geolocator = Nominatim(user_agent= address)
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
    
latitude, longitude

(43.6534817, -79.3839347)

#### Creating dataframe for 'Central Toronto', reseting the index as well

In [13]:
central_toronto = df[df['Borough'] == 'Central Toronto']
central_toronto.reset_index(inplace=True, drop=True)
central_toronto


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197
3,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307
4,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
5,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
6,M4S,Central Toronto,Davisville,43.704324,-79.38879
7,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
8,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049


#### Map of the Neighborhood of Central Toronto

In [14]:
map_central_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(central_toronto['Latitude'], central_toronto['Longitude'], central_toronto['Borough'], central_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_central_toronto)  
    
map_central_toronto

#### Getting venue data from Foursquare

In [15]:
LIMIT = 100

radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)


#### Converting to Json via requests

In [16]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f30172e51e46d2576d3f81e'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 88,
  'suggestedBounds': {'ne': {'lat': 43.6579817045, 'lng': -79.37772678059432},
   'sw': {'lat': 43.6489816955, 'lng': -79.39014261940568}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          'lng'

#### Function for getting categories and venues

In [17]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Getting venue names with lat and long

In [18]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.652270,-79.383516
2,Japango,Sushi Restaurant,43.655268,-79.385165
3,Poke Guys,Poke Place,43.654895,-79.385052
4,Eggspectation Bell Trinity Square,Breakfast Spot,43.653144,-79.381980
...,...,...,...,...
83,EB Games,Video Game Store,43.655293,-79.380328
84,Tim Hortons,Coffee Shop,43.655212,-79.380063
85,Pantages Hotel & Spa,Hotel,43.654498,-79.379035
86,Pantages Lounge & Bar,Cocktail Bar,43.654493,-79.379000


In [19]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

88 venues were returned by Foursquare.


#### Function for getting near by Venues

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
central_toronto_venues = getNearbyVenues(names=central_toronto['Neighborhood'],
                                        latitudes=central_toronto['Latitude'],
                                        longitudes=central_toronto['Longitude'])
central_toronto_venues

Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
North Toronto West,  Lawrence Park
The Annex, North Midtown, Yorkville
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.728020,-79.388790,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.728020,-79.388790,Zodiac Swim School,43.728532,-79.382860,Swim School
2,Lawrence Park,43.728020,-79.388790,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
3,Roselawn,43.711695,-79.416936,Dr.Paul Hodges MIP,43.710634,-79.415810,Health & Beauty Service
4,Roselawn,43.711695,-79.416936,Ceiling Champions,43.713891,-79.420702,Home Service
...,...,...,...,...,...,...,...
103,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,TTC Stop #8160,43.687089,-79.398159,Light Rail Station
104,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Sprout,43.687996,-79.394651,Vietnamese Restaurant
105,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,Pizzaiolo,43.687991,-79.394634,Pizza Place
106,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,TTC Stop #,43.685826,-79.404981,Light Rail Station


In [22]:
central_toronto_venues.shape

(108, 7)

In [23]:
central_toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Davisville,32,32,32,32,32,32
Davisville North,8,8,8,8,8,8
"Forest Hill North & West, Forest Hill Road Park",4,4,4,4,4,4
Lawrence Park,3,3,3,3,3,3
"Moore Park, Summerhill East",3,3,3,3,3,3
"North Toronto West, Lawrence Park",18,18,18,18,18,18
Roselawn,3,3,3,3,3,3
"Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park",17,17,17,17,17,17
"The Annex, North Midtown, Yorkville",20,20,20,20,20,20


In [24]:
print('There are {} uniques categories.'.format(len(central_toronto_venues['Venue Category'].unique())))

There are 60 uniques categories.


#### One hot encoding

In [25]:
# one hot encoding
central_toronto_onehot = pd.get_dummies(central_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
central_toronto_onehot['Neighborhood'] = central_toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [central_toronto_onehot.columns[-1]] + list(central_toronto_onehot.columns[:-1])
central_toronto_onehot = central_toronto_onehot[fixed_columns]

central_toronto_onehot

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bank,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Supermarket,Sushi Restaurant,Swim School,Tennis Court,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,Roselawn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Roselawn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
104,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
105,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
106,"Summerhill West, Rathnelly, South Hill, Forest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
central_toronto_onehot.shape

(108, 61)

In [27]:
central_toronto_grouped = central_toronto_onehot.groupby('Neighborhood').mean().reset_index()
central_toronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bank,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Supermarket,Sushi Restaurant,Swim School,Tennis Court,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0625,...,0.0,0.0625,0.0,0.0,0.03125,0.03125,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Forest Hill North & West, Forest Hill Road Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
3,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,...,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0
5,"North Toronto West, Lawrence Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
6,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Summerhill West, Rathnelly, South Hill, Forest...",0.058824,0.0,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,...,0.058824,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0
8,"The Annex, North Midtown, Yorkville",0.0,0.05,0.0,0.0,0.0,0.0,0.05,0.0,0.15,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0


In [28]:
central_toronto_grouped.shape

(9, 61)

#### Top five venues for each Neighborhood

In [29]:
num_top_venues = 5

for hood in central_toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = central_toronto_grouped[central_toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
                venue  freq
0        Dessert Shop  0.09
1      Sandwich Place  0.09
2         Pizza Place  0.09
3    Sushi Restaurant  0.06
4  Italian Restaurant  0.06


----Davisville North----
                  venue  freq
0                  Park  0.12
1  Gym / Fitness Center  0.12
2               Dog Run  0.12
3      Department Store  0.12
4        Sandwich Place  0.12


----Forest Hill North & West, Forest Hill Road Park----
                 venue  freq
0        Jewelry Store  0.25
1                Trail  0.25
2             Bus Line  0.25
3     Sushi Restaurant  0.25
4  American Restaurant  0.00


----Lawrence Park----
                 venue  freq
0             Bus Line  0.33
1          Swim School  0.33
2                 Park  0.33
3  American Restaurant  0.00
4       Sandwich Place  0.00


----Moore Park, Summerhill East----
                 venue  freq
0                  Gym  0.33
1         Tennis Court  0.33
2           Restaurant  0.33
3  American Restaurant

#### Function for most common venues 

In [30]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Sorting venues based on top ten

In [31]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = central_toronto_grouped['Neighborhood']

for ind in np.arange(central_toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(central_toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Pizza Place,Dessert Shop,Sandwich Place,Sushi Restaurant,Coffee Shop,Italian Restaurant,Café,Gym,Restaurant,Gourmet Shop
1,Davisville North,Dog Run,Food & Drink Shop,Hotel,Gym / Fitness Center,Breakfast Spot,Sandwich Place,Department Store,Park,Farmers Market,Fast Food Restaurant
2,"Forest Hill North & West, Forest Hill Road Park",Sushi Restaurant,Bus Line,Trail,Jewelry Store,Donut Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Fried Chicken Joint,Yoga Studio
3,Lawrence Park,Swim School,Bus Line,Park,Yoga Studio,Dog Run,Gym,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station
4,"Moore Park, Summerhill East",Restaurant,Gym,Tennis Court,Yoga Studio,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station,Garden,Fried Chicken Joint
5,"North Toronto West, Lawrence Park",Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Gift Shop,Fast Food Restaurant,Mexican Restaurant,Park,Diner,Salon / Barbershop
6,Roselawn,Home Service,Health & Beauty Service,Garden,Gym / Fitness Center,Gym,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station,Fried Chicken Joint
7,"Summerhill West, Rathnelly, South Hill, Forest...",Pub,Light Rail Station,Coffee Shop,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Liquor Store,Pizza Place,Restaurant,American Restaurant
8,"The Annex, North Midtown, Yorkville",Sandwich Place,Café,Coffee Shop,History Museum,Indian Restaurant,Donut Shop,Liquor Store,Middle Eastern Restaurant,Park,Pharmacy


#### ML k clusters 

In [32]:
# set number of clusters
kclusters = 6

central_toronto_grouped_clustering = central_toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(central_toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 5, 4, 0, 2, 1, 3, 1, 1], dtype=int32)

In [33]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

central_toronto_merged = central_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
central_toronto_merged = central_toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

central_toronto_merged # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Swim School,Bus Line,Park,Yoga Studio,Dog Run,Gym,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station
1,M5N,Central Toronto,Roselawn,43.711695,-79.416936,3,Home Service,Health & Beauty Service,Garden,Gym / Fitness Center,Gym,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station,Fried Chicken Joint
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197,5,Dog Run,Food & Drink Shop,Hotel,Gym / Fitness Center,Breakfast Spot,Sandwich Place,Department Store,Park,Farmers Market,Fast Food Restaurant
3,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307,4,Sushi Restaurant,Bus Line,Trail,Jewelry Store,Donut Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Fried Chicken Joint,Yoga Studio
4,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678,1,Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Gift Shop,Fast Food Restaurant,Mexican Restaurant,Park,Diner,Salon / Barbershop
5,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,1,Sandwich Place,Café,Coffee Shop,History Museum,Indian Restaurant,Donut Shop,Liquor Store,Middle Eastern Restaurant,Park,Pharmacy
6,M4S,Central Toronto,Davisville,43.704324,-79.38879,1,Pizza Place,Dessert Shop,Sandwich Place,Sushi Restaurant,Coffee Shop,Italian Restaurant,Café,Gym,Restaurant,Gourmet Shop
7,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,2,Restaurant,Gym,Tennis Court,Yoga Studio,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station,Garden,Fried Chicken Joint
8,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,1,Pub,Light Rail Station,Coffee Shop,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Liquor Store,Pizza Place,Restaurant,American Restaurant


In [34]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(central_toronto_merged['Latitude'], central_toronto_merged['Longitude'], central_toronto_merged['Neighborhood'], central_toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [35]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 0, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,0,Swim School,Bus Line,Park,Yoga Studio,Dog Run,Gym,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station


In [36]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 1, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,1,Clothing Store,Coffee Shop,Yoga Studio,Sporting Goods Shop,Gift Shop,Fast Food Restaurant,Mexican Restaurant,Park,Diner,Salon / Barbershop
5,Central Toronto,1,Sandwich Place,Café,Coffee Shop,History Museum,Indian Restaurant,Donut Shop,Liquor Store,Middle Eastern Restaurant,Park,Pharmacy
6,Central Toronto,1,Pizza Place,Dessert Shop,Sandwich Place,Sushi Restaurant,Coffee Shop,Italian Restaurant,Café,Gym,Restaurant,Gourmet Shop
8,Central Toronto,1,Pub,Light Rail Station,Coffee Shop,Sports Bar,Vietnamese Restaurant,Fried Chicken Joint,Liquor Store,Pizza Place,Restaurant,American Restaurant


In [37]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 2, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Central Toronto,2,Restaurant,Gym,Tennis Court,Yoga Studio,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station,Garden,Fried Chicken Joint


In [38]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 3, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Central Toronto,3,Home Service,Health & Beauty Service,Garden,Gym / Fitness Center,Gym,Greek Restaurant,Gourmet Shop,Gift Shop,Gas Station,Fried Chicken Joint


In [39]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 4, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Central Toronto,4,Sushi Restaurant,Bus Line,Trail,Jewelry Store,Donut Shop,Farmers Market,Fast Food Restaurant,Food & Drink Shop,Fried Chicken Joint,Yoga Studio


In [40]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 5, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Central Toronto,5,Dog Run,Food & Drink Shop,Hotel,Gym / Fitness Center,Breakfast Spot,Sandwich Place,Department Store,Park,Farmers Market,Fast Food Restaurant


In [41]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 6, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


In [None]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 7, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]

In [None]:
central_toronto_merged.loc[central_toronto_merged['Cluster Labels'] == 8, central_toronto_merged.columns[[1] + list(range(5, central_toronto_merged.shape[1]))]]