In [1]:
# importing libraries
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd


import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

### Scrape and transform the data about Toronto neighbrhood into a dataframe ###

In [2]:
# specify the url
post_codes = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
# query the website and return the html to the variable ‘page’
page = requests.get(post_codes, timeout=5)

In [4]:
# parse the html using beautiful soup and store in variable `soup`
soup = BeautifulSoup(page.content, 'html.parser')

### Creating the dataframe by droping the cells with "not assigned" borough and combining borough listed twice ###

In [5]:
code_table = soup.find('table')
code_rows = code_table.findAll('tr')
columns=['Postcode', 'Borough', 'Neighbourhood']
df_codes = pd.DataFrame(columns=columns)
for idx, val in enumerate(code_rows):
    code_cells = val.findAll('td')
    df_list = []
    for idx, val in enumerate(code_cells):
        df_list.append(val.text.rstrip())
    if(int(len(df_list)) > 0):
        if(df_list[1] != "Not assigned"):
            if(df_list[2] == "Not assigned"):
                df_list[2] = df_list[1]

            df_dic={columns[0]: df_list[0], columns[1]: df_list[1], columns[2]: df_list[2]}
            df_codes = df_codes.append(df_dic, ignore_index=True)

df_codes = df_codes.groupby('Postcode', as_index=False).agg(lambda x: ', '.join(set(x.dropna())))                

### shape of our dataframe ###

In [6]:
df_codes.shape

(103, 3)

In [7]:
df_codes

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union"
2,M1E,Scarborough,"West Hill, Guildwood, Morningside"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Ionview, East Birchmount Park, Kennedy Park"
7,M1L,Scarborough,"Oakridge, Golden Mile, Clairlea"
8,M1M,Scarborough,"Scarborough Village West, Cliffside, Cliffcrest"
9,M1N,Scarborough,"Cliffside West, Birch Cliff"


In [8]:
!wget -O Geospatial_data.csv https://cocl.us/Geospatial_data

--2019-09-04 23:33:25--  https://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 169.48.113.201
Connecting to cocl.us (cocl.us)|169.48.113.201|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-09-04 23:33:25--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 185.235.236.197
Connecting to ibm.box.com (ibm.box.com)|185.235.236.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-09-04 23:33:26--  https://ibm.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Reusing existing connection to ibm.box.com:443.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-09-0

# Using the csv file to create the dataframe with the geographical coordinates #

In [9]:
df_geo = pd.read_csv("Geospatial_data.csv")
df_codes = df_codes.join(df_geo)

In [10]:
df_codes

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Highland Creek, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"West Hill, Guildwood, Morningside",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"Ionview, East Birchmount Park, Kennedy Park",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Oakridge, Golden Mile, Clairlea",M1L,43.711112,-79.284577
8,M1M,Scarborough,"Scarborough Village West, Cliffside, Cliffcrest",M1M,43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",M1N,43.692657,-79.264848


# Exploring and clustering the neighborhoods in Toronto ###

In [11]:
!pip install geopy
!conda install -c conda-forge folium=0.5.0 --yes 

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.3 MB

The following NEW packages will be 

### Checking the geographical coordinate of Toronto ###

In [12]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


  app.launch_new_instance()


In [13]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
folium.TileLayer('openstreetmap').add_to(map_toronto)
# add markers to map
for lat, lng, label in zip(df_codes['Latitude'], df_codes['Longitude'], df_codes['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Capstone week 2 : African dishes based Vegan food restaurant project ###
###### We will try to predict the best neighborhood to install our restaurant based on the analyze of venue by clusters ######

In [14]:
CLIENT_ID = 'DM4PTKRNKDO0BUQH1J43QR5KRFUPRK4BAAECWAIYTP2L53FQ'
CLIENT_SECRET = '1WZ2JHY0YLTP0XKLMGN2DI1BZNJLVEATSVPF4R0RTEWDMQFM' 
VERSION = '20180605' # Foursquare API version
radius = 1000
LIMIT = 200
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: DM4PTKRNKDO0BUQH1J43QR5KRFUPRK4BAAECWAIYTP2L53FQ
CLIENT_SECRET:1WZ2JHY0YLTP0XKLMGN2DI1BZNJLVEATSVPF4R0RTEWDMQFM


###### Let us extract a random neighborhood and search for its coordinates ######

In [16]:
df_codes.loc[6, 'Neighbourhood']

'Ionview, East Birchmount Park, Kennedy Park'

In [17]:
neighborhood_latitude = df_codes.loc[6, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_codes.loc[6, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_codes.loc[6, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Ionview, East Birchmount Park, Kennedy Park are 43.7279292, -79.26202940000002.


#### We will get the "url" to diplay venue around our randomly choosen neighborhood ####

In [18]:
LIMIT = 100 
radius = 500 

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=DM4PTKRNKDO0BUQH1J43QR5KRFUPRK4BAAECWAIYTP2L53FQ&client_secret=1WZ2JHY0YLTP0XKLMGN2DI1BZNJLVEATSVPF4R0RTEWDMQFM&v=20180605&ll=43.7279292,-79.26202940000002&radius=500&limit=100'

In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d704ef1b77c77002c513ed7'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 4,
  'suggestedBounds': {'ne': {'lat': 43.7324292045, 'lng': -79.25581377000155},
   'sw': {'lat': 43.723429195499996, 'lng': -79.26824502999848}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b6a37ccf964a520a5cd2be3',
       'name': 'Giant Tiger',
       'location': {'address': '682 Kennedy Road',
        'crossStreet': 'Eglinton Ave. E.',
        'lat': 43.72744662939136,
        'lng': -79.26624035854763,
        'labeledLatLngs': [{'label': 'display',
          '

#### we will then extract the venues obtainend by the results below by category ####

In [20]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

###### We go ahead to normalize and transform the json file in a dataframe named nearby venues #######

In [21]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Giant Tiger,Department Store,43.727447,-79.26624
1,Tim Hortons,Coffee Shop,43.726895,-79.266157
2,Bros. CONVENIENCE,Convenience Store,43.727781,-79.265708
3,Tandy Leather,Hobby Shop,43.726974,-79.266513


###### Now let'us explore the city of Toronto's venue and return the result obtained by the Foursquare Api in a dataframe named Toronto venues ######

In [22]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

toronto_venues = getNearbyVenues(names=df_codes['Neighbourhood'],
                                   latitudes=df_codes['Latitude'],
                                   longitudes=df_codes['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Highland Creek, Port Union
West Hill, Guildwood, Morningside
Woburn
Cedarbrae
Scarborough Village
Ionview, East Birchmount Park, Kennedy Park
Oakridge, Golden Mile, Clairlea
Scarborough Village West, Cliffside, Cliffcrest
Cliffside West, Birch Cliff
Wexford Heights, Scarborough Town Centre, Dorset Park
Wexford, Maryvale
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Milliken, L'Amoreaux East, Agincourt North, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Don Mills South, Flemingdon Park
Downsview North, Bathurst Manor, Wilson Heights
York University, Northwood Park
Downsview East, CFB Toronto
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [23]:
print(toronto_venues.shape)
toronto_venues.head()

(2249, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Highland Creek, Port Union",43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
2,"Rouge Hill, Highland Creek, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"West Hill, Guildwood, Morningside",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"West Hill, Guildwood, Morningside",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [25]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",9,9,9,9,9,9
Bayview Village,4,4,4,4,4,4
"Beaumond Heights, South Steeles, Albion Gardens, Humbergate, Thistletown, Silverstone, Mount Olive, Jamestown",11,11,11,11,11,11
Berczy Park,55,55,55,55,55,55
"Brockton, Exhibition Place, Parkdale Village",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,15,15,15,15,15,15
"Cabbagetown, St. James Town",41,41,41,41,41,41
Caledonia-Fairbanks,5,5,5,5,5,5
Canada Post Gateway Processing Centre,11,11,11,11,11,11


##### Let us analyze each neighborhood and return a dataframe named Toronto ont hot which we name Toronto onehot and the group the venues by neighborhood ######

In [26]:

toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Beaumond Heights, South Steeles, Albion Garden...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### We will now identifiate the top 10 most common venue by each neighborhood and return the result in a dataframe ######

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Skating Rink,Breakfast Spot,Sandwich Place,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Gym,Skating Rink,Pharmacy,Sandwich Place,Pub,Pool,Dog Run,Dessert Shop
2,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,Diner,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
3,"Beaumond Heights, South Steeles, Albion Garden...",Grocery Store,Fast Food Restaurant,Coffee Shop,Japanese Restaurant,Discount Store,Sandwich Place,Beer Store,Fried Chicken Joint,Pizza Place,Pharmacy
4,Berczy Park,Coffee Shop,Cocktail Bar,Steakhouse,Cheese Shop,Café,Farmers Market,Seafood Restaurant,Bakery,Beer Bar,Nightclub


###### Finally let'use the k-means method to see how the venues are clustered ######

In [28]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
#kmeans.labels_[0:10] 
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       4, 1, 1, 4, 4, 4, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 4,
       1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 2, 1, 1, 4, 3, 1, 1], dtype=int32)

In [29]:
df_codes_join = df_codes.rename(columns={'Neighbourhood':'Neighborhood'}) #['Neighbourhood','Latitude','Longitude']
result = toronto_grouped.join(df_codes_join.set_index('Neighborhood'), on='Neighborhood')
#result = pd.concat([toronto_grouped,df_codes_join],axis=1, join='inner', on='Neighborhood')
toronto_merged = result

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!
toronto_merged.columns.values

array(['Neighborhood', 'Yoga Studio', 'Accessories Store',
       'Afghan Restaurant', 'Airport', 'Airport Food Court',
       'Airport Gate', 'Airport Lounge', 'Airport Service',
       'Airport Terminal', 'American Restaurant', 'Antique Shop',
       'Aquarium', 'Art Gallery', 'Art Museum', 'Arts & Crafts Store',
       'Asian Restaurant', 'Athletics & Sports', 'Auto Garage',
       'Auto Workshop', 'BBQ Joint', 'Baby Store', 'Bagel Shop', 'Bakery',
       'Bank', 'Bar', 'Baseball Field', 'Baseball Stadium',
       'Basketball Court', 'Basketball Stadium', 'Beach',
       'Bed & Breakfast', 'Beer Bar', 'Beer Store', 'Belgian Restaurant',
       'Bike Shop', 'Bistro', 'Boat or Ferry', 'Bookstore', 'Boutique',
       'Brazilian Restaurant', 'Breakfast Spot', 'Brewery', 'Bridal Shop',
       'Bubble Tea Shop', 'Building', 'Burger Joint', 'Burrito Place',
       'Bus Line', 'Bus Station', 'Business Service', 'Butcher', 'Café',
       'Cajun / Creole Restaurant', 'Camera Store', 'Candy St

### Clusters vizualisation on a map ###

In [30]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### at the end we will examine each cluster carefully to make our choice ###

In [31]:
## Cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
86,"West Deane Park, Islington, Martin Grove, Clov...",Bank,Women's Store,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Diner


In [33]:
## Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Skating Rink,Breakfast Spot,Sandwich Place,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Gym,Skating Rink,Pharmacy,Sandwich Place,Pub,Pool,Dog Run,Dessert Shop
2,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,Diner,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
3,"Beaumond Heights, South Steeles, Albion Garden...",Grocery Store,Fast Food Restaurant,Coffee Shop,Japanese Restaurant,Discount Store,Sandwich Place,Beer Store,Fried Chicken Joint,Pizza Place,Pharmacy
4,Berczy Park,Coffee Shop,Cocktail Bar,Steakhouse,Cheese Shop,Café,Farmers Market,Seafood Restaurant,Bakery,Beer Bar,Nightclub
5,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Breakfast Spot,Café,Bakery,Stadium,Burrito Place,Restaurant,Caribbean Restaurant,Climbing Gym,Pet Store
6,Business Reply Mail Processing Centre 969 Eastern,Gym / Fitness Center,Garden,Comic Shop,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Skate Park,Farmers Market
7,"Cabbagetown, St. James Town",Coffee Shop,Bakery,Café,Italian Restaurant,Pub,Pizza Place,Restaurant,Japanese Restaurant,Bank,Chinese Restaurant
9,Canada Post Gateway Processing Centre,Coffee Shop,Hotel,American Restaurant,Middle Eastern Restaurant,Sandwich Place,Burrito Place,Mediterranean Restaurant,Fried Chicken Joint,Gym / Fitness Center,Dumpling Restaurant
10,Cedarbrae,Hakka Restaurant,Athletics & Sports,Fried Chicken Joint,Bakery,Caribbean Restaurant,Thai Restaurant,Bank,Discount Store,Doner Restaurant,Donut Shop


In [34]:
### Cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
94,"Willowdale, Newtonbrook",Home Service,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant


In [35]:
#### Cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
98,"York Mills, Silver Hills",Martial Arts Dojo,Women's Store,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant


In [36]:

#### Cluster 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0] + list(range(toronto_merged.shape[1]-10, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Caledonia-Fairbanks,Park,Women's Store,Market,Fast Food Restaurant,Event Space,Falafel Restaurant,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Dim Sum Restaurant
22,"Downsview East, CFB Toronto",Park,Playground,Airport,Construction & Landscaping,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop
25,Downsview West,Shopping Mall,Park,Grocery Store,Bank,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
26,"Downsview, North Park, Upwood Park",Park,Bakery,Construction & Landscaping,Basketball Court,Women's Store,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
27,East Toronto,Park,Coffee Shop,Convenience Store,Metro Station,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop
30,"Forest Hill North, Forest Hill West",Trail,Park,Sushi Restaurant,Jewelry Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
50,Lawrence Park,Park,Bus Line,Swim School,Women's Store,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
54,"Milliken, L'Amoreaux East, Agincourt North, St...",Park,Playground,Gym,Coffee Shop,Women's Store,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant
62,Parkwoods,Food & Drink Shop,Park,Women's Store,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
65,Rosedale,Park,Trail,Playground,Building,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop


# " This is the end of the Notebook" #