# APPLIED DATA SCIENCE CAPSTONE - THE BATTLE OF NEIGHBOURHOODS

## Table of Contents

<div class="alert alert-block alert-info" style="margin-top: 20px">

<font size = 3>

1. <a href='#item1'>Toronto, Canada</a>
2. <a href='#item1R'>Toronto, Canada - Results</a>

3. <a href='#item2'>Sydney, Australia</a>
4. <a href='#item2R'>Sydney, Australia - Results</a>

5. <a href="#item3">London, United Kingdom</a>
6. <a href="#item3R">London, United Kingdom - Results</a>

7. <a href="#item4">New York, United States</a>
8. <a href="#item4R">New York, United States - Results</a>

9. <a href="#item5">Bringing All Cities Together</a>
10. <a href="#item5R">Bringing All Cities Together - Results</a>   
</font>
</div>

<a id='item1'></a>
# 1. TORONTO, CANADA

### Scraping Data From Wikipedia

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
website_text = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_text,'xml')

table = soup.find('table',{'class':'wikitable sortable'})
table_rows = table.find_all('tr')
#table_rows

### Converting Data Into a Pandas Dataframe

In [2]:
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]  # to filter out bad rows

In [3]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


### Removing Boroughs That Are Not Assigned

In [4]:
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


In [5]:
df[df.Borough == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood


### Replacing Neighbourhoods That Are Not Assigned To Equal Borough

In [6]:
df[df.Neighbourhood == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood
9,M7A,Queen's Park,Not assigned


In [7]:
df = df.replace(to_replace ="Not assigned", 
                 value ="Queen\'s Park")

### Check That All Boroughs and Neighbourhoods Are Assigned

In [8]:
df[df.Borough == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood


In [9]:
df[df.Neighbourhood == 'Not assigned']

Unnamed: 0,PostalCode,Borough,Neighbourhood


### Merging Neighbourhoods With The Same PostalCode

In [10]:
postcodefig = ''
# indexfig=0
df_inter = []

for count, row in df.iterrows():
    if row['PostalCode']== postcodefig:
        df_inter += [[count-1, row['Neighbourhood']]]
    postcodefig = row['PostalCode']  
    

df_inter

[[5, 'Regent Park'],
 [7, 'Lawrence Manor'],
 [12, 'Malvern'],
 [16, 'Parkview Hill'],
 [18, 'Garden District'],
 [23, 'Islington'],
 [24, 'Martin Grove'],
 [25, 'Princess Gardens'],
 [26, 'West Deane Park'],
 [28, 'Rouge Hill'],
 [29, 'Port Union'],
 [32, 'Don Mills South'],
 [39, 'Eringate'],
 [40, 'Markland Wood'],
 [41, 'Old Burnhamthorpe'],
 [43, 'Morningside'],
 [44, 'West Hill'],
 [65, 'Downsview North'],
 [66, 'Wilson Heights'],
 [69, 'King'],
 [70, 'Richmond'],
 [72, 'Dufferin'],
 [78, 'Henry Farm'],
 [79, 'Oriole'],
 [81, 'York University'],
 [84, 'Toronto Islands'],
 [85, 'Union Station'],
 [87, 'Trinity'],
 [92, 'Ionview'],
 [93, 'Kennedy Park'],
 [96, 'Downsview East'],
 [98, 'Riverdale'],
 [100, 'Toronto Dominion Centre'],
 [102, 'Exhibition Place'],
 [103, 'Parkdale Village'],
 [108, 'Golden Mile'],
 [109, 'Oakridge'],
 [111, 'York Mills'],
 [114, 'India Bazaar'],
 [116, 'Victoria Hotel'],
 [118, 'North Park'],
 [119, 'Upwood Park'],
 [124, 'Cliffside'],
 [125, 'Scarboro

In [11]:
for i in df_inter:
    df.at[i[0],'Neighbourhood']+=", " + i[1]
    df.drop(index=i[0])

In [12]:
df.drop_duplicates(subset= 'PostalCode', keep='first', inplace=True)

### Displaying The Shape and Final Dataframe

In [13]:
df.shape

(103, 3)

In [14]:
df

Unnamed: 0,PostalCode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Harbourfront, Regent Park"
7,M6A,North York,"Lawrence Heights, Lawrence Manor"
9,M7A,Queen's Park,Queen's Park
11,M9A,Etobicoke,Islington Avenue
12,M1B,Scarborough,"Rouge, Malvern"
15,M3B,North York,Don Mills North
16,M4B,East York,"Woodbine Gardens, Parkview Hill"
18,M5B,Downtown Toronto,"Ryerson, Garden District"


### Adding Latitudes and Longitudes To Dataframe

In [15]:
!wget -O Geospatial_Coordinates.csv https://cocl.us/Geospatial_data

--2019-06-30 00:05:08--  https://cocl.us/Geospatial_data
Resolving cocl.us (cocl.us)... 159.8.72.228
Connecting to cocl.us (cocl.us)|159.8.72.228|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-06-30 00:05:12--  https://ibm.box.com/shared/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Resolving ibm.box.com (ibm.box.com)... 107.152.26.197
Connecting to ibm.box.com (ibm.box.com)|107.152.26.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-06-30 00:05:12--  https://ibm.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv
Reusing existing connection to ibm.box.com:443.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://ibm.ent.box.com/public/static/9afzr83pps4pwf2smjjcf1y5mvgb18rr.csv [following]
--2019-06-30 00:0

In [16]:
location = pd.read_csv('Geospatial_Coordinates.csv')
location.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
location.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
df_location = pd.merge(df,location,on='PostalCode')

In [18]:
df_location

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


### Install Libraries

In [19]:
import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

   

### Filter Downtown Toronto Only

In [20]:
#df_location_filtered = df_location
df_location_filtered = df_location[df_location.Borough == 'Downtown Toronto']

In [21]:
df_location_filtered.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


### Plotting A Map of Toronto

In [22]:
latitude = 43.6529
longitude = -79.3849
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6529, -79.3849.


In [23]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_location_filtered['Latitude'], df_location_filtered['Longitude'], df_location_filtered['Borough'], df_location_filtered['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

### Define Foursquare Credentials and Version

In [24]:
CLIENT_ID = 'KJGU4QFJY0VTPJD0J34C3BVOLQAP4YDWA54IWG2R1MO3WAMQ' # your Foursquare ID
CLIENT_SECRET = 'KTABSOHL0GFSVLX5FFZORSQ4MYQSQP3SNQVFNMQ4T0RN0QZO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KJGU4QFJY0VTPJD0J34C3BVOLQAP4YDWA54IWG2R1MO3WAMQ
CLIENT_SECRET:KTABSOHL0GFSVLX5FFZORSQ4MYQSQP3SNQVFNMQ4T0RN0QZO


### Exploring The Neighbourhoods

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
venues = getNearbyVenues(names=df_location_filtered['Neighbourhood'],
                                   latitudes=df_location_filtered['Latitude'],
                                   longitudes=df_location_filtered['Longitude']
                                  )

Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Christie
Adelaide, King
Harbourfront East, Toronto Islands
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park
CN Tower, Bathurst Quay
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
First Canadian Place, Underground city
Church and Wellesley


In [27]:
print(venues.shape)
venues.head()

(1288, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront, Regent Park",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Harbourfront, Regent Park",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Harbourfront, Regent Park",43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,"Harbourfront, Regent Park",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Harbourfront, Regent Park",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [28]:
venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King",100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55
"CN Tower, Bathurst Quay",16,16,16,16,16,16
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,88,88,88,88,88,88
"Chinatown, Grange Park",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,87,87,87,87,87,87
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100


In [29]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 208 uniques categories.


### Analyse Each Neighbourhoods

In [30]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
onehot.shape

(1288, 209)

In [32]:
grouped = onehot.groupby('Neighbourhood').mean().reset_index()
grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide, King",0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Bathurst Quay",0.0,0.0,0.0,0.0625,0.0625,0.0625,0.125,0.125,0.125,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.011364,0.0,0.011364,0.0,0.011364,0.0,0.011364
5,"Chinatown, Grange Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.06,0.0,0.0,0.03,0.01,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.0,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.011494,0.0,0.011494,0.0,0.011494,0.011494
8,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0
9,"Design Exchange, Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0


In [33]:
grouped.shape

(18, 209)

In [34]:
num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.05
2  American Restaurant  0.04
3                  Bar  0.04
4      Thai Restaurant  0.04


----Berczy Park----
          venue  freq
0   Coffee Shop  0.09
1  Cocktail Bar  0.05
2      Beer Bar  0.04
3    Steakhouse  0.04
4          Café  0.04


----CN Tower, Bathurst Quay----
              venue  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Terminal  0.12
3     Boat or Ferry  0.06
4   Harbor / Marina  0.06


----Cabbagetown, St. James Town----
                venue  freq
0         Coffee Shop  0.09
1          Restaurant  0.07
2  Italian Restaurant  0.04
3         Pizza Place  0.04
4              Bakery  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.16
1                Café  0.05
2  Italian Restaurant  0.05
3        Burger Joint  0.03
4      Sandwich Place  0.03


----Chinatown, Grange Park----
                       

### Converting Data Into A Pandas Dataframe

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [36]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King",Coffee Shop,Café,Bar,American Restaurant,Steakhouse,Thai Restaurant,Cosmetics Shop,Burger Joint,Gym,Hotel
1,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Bakery,Beer Bar,Steakhouse,Cheese Shop,Café,Italian Restaurant
2,"CN Tower, Bathurst Quay",Airport Lounge,Airport Service,Airport Terminal,Boutique,Sculpture Garden,Boat or Ferry,Airport,Airport Food Court,Airport Gate,Bar
3,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Park,Pub,Café,Bakery,Italian Restaurant,Pizza Place,Breakfast Spot,Butcher
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Middle Eastern Restaurant,Burger Joint,Chinese Restaurant,Spa,Restaurant,Sushi Restaurant


### Cluster Neighbourhood

In [37]:
# set number of clusters
kclusters = 7

grouped_clustering = grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 6, 4, 6, 0, 6, 2, 6, 0, 0], dtype=int32)

In [38]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = df_location_filtered

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,5,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Restaurant,Café,Mexican Restaurant,Theater,Yoga Studio
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,6,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Ramen Restaurant,Theater,Restaurant,Pizza Place,Tea Room
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,6,Coffee Shop,Café,Hotel,Restaurant,Gastropub,Cosmetics Shop,Breakfast Spot,Bakery,Italian Restaurant,Farmers Market
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,6,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Bakery,Beer Bar,Steakhouse,Cheese Shop,Café,Italian Restaurant
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Middle Eastern Restaurant,Burger Joint,Chinese Restaurant,Spa,Restaurant,Sushi Restaurant


In [39]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

<a id='item1R'></a>

In [40]:
map_clusters

### Examining Clusters

### Cluster 1

In [41]:
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,Central Bay Street,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Middle Eastern Restaurant,Burger Joint,Chinese Restaurant,Spa,Restaurant,Sushi Restaurant
30,"Adelaide, King",0,Coffee Shop,Café,Bar,American Restaurant,Steakhouse,Thai Restaurant,Cosmetics Shop,Burger Joint,Gym,Hotel
36,"Harbourfront East, Toronto Islands",0,Coffee Shop,Aquarium,Hotel,Italian Restaurant,Café,Brewery,Restaurant,Bakery,Pizza Place,Sporting Goods Shop
42,"Design Exchange, Toronto Dominion Centre",0,Coffee Shop,Café,Hotel,Restaurant,Bakery,Italian Restaurant,Gastropub,Bar,Deli / Bodega,Japanese Restaurant
48,"Commerce Court, Victoria Hotel",0,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Gastropub,Seafood Restaurant,Bakery,Deli / Bodega,Italian Restaurant
97,"First Canadian Place, Underground city",0,Coffee Shop,Café,Hotel,Restaurant,Bar,Steakhouse,American Restaurant,Bakery,Asian Restaurant,Gastropub


### Cluster 2

In [42]:
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Rosedale,1,Park,Playground,Trail,Building,Yoga Studio,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


### Cluster 3

In [43]:
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,Christie,2,Grocery Store,Café,Park,Italian Restaurant,Coffee Shop,Restaurant,Diner,Convenience Store,Baby Store,Nightclub


### Cluster 4

In [44]:
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
80,"Harbord, University of Toronto",3,Café,Restaurant,Bookstore,Japanese Restaurant,Bar,Italian Restaurant,Bakery,Nightclub,Sandwich Place,Comfort Food Restaurant


### Cluster 5

In [45]:
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
87,"CN Tower, Bathurst Quay",4,Airport Lounge,Airport Service,Airport Terminal,Boutique,Sculpture Garden,Boat or Ferry,Airport,Airport Food Court,Airport Gate,Bar


### Cluster 6

In [46]:
merged.loc[merged['Cluster Labels'] == 5, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Harbourfront, Regent Park",5,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Restaurant,Café,Mexican Restaurant,Theater,Yoga Studio


### Cluster 7

In [47]:
merged.loc[merged['Cluster Labels'] == 6, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,"Ryerson, Garden District",6,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Ramen Restaurant,Theater,Restaurant,Pizza Place,Tea Room
15,St. James Town,6,Coffee Shop,Café,Hotel,Restaurant,Gastropub,Cosmetics Shop,Breakfast Spot,Bakery,Italian Restaurant,Farmers Market
20,Berczy Park,6,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Bakery,Beer Bar,Steakhouse,Cheese Shop,Café,Italian Restaurant
84,"Chinatown, Grange Park",6,Café,Vegetarian / Vegan Restaurant,Dumpling Restaurant,Bakery,Bar,Mexican Restaurant,Coffee Shop,Vietnamese Restaurant,Chinese Restaurant,Farmers Market
92,Stn A PO Boxes 25 The Esplanade,6,Coffee Shop,Restaurant,Café,Beer Bar,Seafood Restaurant,Hotel,Cocktail Bar,Italian Restaurant,Park,Cheese Shop
96,"Cabbagetown, St. James Town",6,Coffee Shop,Restaurant,Park,Pub,Café,Bakery,Italian Restaurant,Pizza Place,Breakfast Spot,Butcher
99,Church and Wellesley,6,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Café,Pub,Hotel,Fast Food Restaurant,Gym


<a id='item2'></a>
# 2. SYDNEY, AUSTRALIA

### Importing Data for Sydney

In [48]:
import io

url2 = "https://raw.githubusercontent.com/see-scen-dev/Coursera_Capstone/master/Sydney_Coorindates.csv"
s = requests.get(url2).content
df_location_SYD = pd.read_csv(io.StringIO(s.decode('utf-8')))
df_location_SYD

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,2000,Sydney,Dawes Point,-33.86,151.21
1,2000,Sydney,Haymarket,-33.88,151.2
2,2000,Sydney,Millers Point,-33.86,151.2
3,2000,Sydney,Sydney,-33.87,151.21
4,2000,Sydney,The Rocks,-33.86,151.21
5,2002,Sydney,World Square,-33.88,151.21
6,2004,Sydney,Eastern Suburbs,-33.95,151.21
7,2006,Sydney,University Of Sydney,-33.89,151.19
8,2007,Sydney,Ultimo,-33.88,151.2
9,2008,Sydney,Chippendale,-33.88,151.2


### Plotting A Map of Sydney

In [49]:
latitude = -33.86785
longitude = 151.20732
print('The geograpical coordinate of Sydney are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Sydney are -33.86785, 151.20732.


In [50]:
map_Sydney = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_location_SYD['Latitude'], df_location_SYD['Longitude'], df_location_SYD['Borough'], df_location_SYD['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Sydney)  
    
map_Sydney

### Exploring The Neighbourhoods

In [51]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [52]:
venues = getNearbyVenues(names=df_location_SYD['Neighbourhood'],
                                   latitudes=df_location_SYD['Latitude'],
                                   longitudes=df_location_SYD['Longitude']
                                  )

Dawes Point
Haymarket
Millers Point
Sydney
The Rocks
World Square
Eastern Suburbs
University Of Sydney
Ultimo
Chippendale
Darlington
Pyrmont
Surry Hills
Darlinghurst
Woolloomooloo
Elizabeth Bay
Potts Point
Rushcutters Bay
Bondi
Darling Point
Edgecliff
Point Piper
Double Bay
Rose Bay
Vaucluse
Watsons Bay
Dover Heights
Clovelly
Randwick
Daceyville
Kingsford
Kensington
Coogee
South Coogee
North Sydney
Waverton
Lavender Bay
Mcmahons Point
North Sydney
Kirribilli
Milsons Point


In [53]:
print(venues.shape)
venues.head()

(2053, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Dawes Point,-33.86,151.21,The MCA Cafe & Sculpture Terrace,-33.859512,151.209228,Café
1,Dawes Point,-33.86,151.21,Museum of Contemporary Art,-33.859859,151.209018,Art Museum
2,Dawes Point,-33.86,151.21,The Fine Food Store,-33.858546,151.208633,Café
3,Dawes Point,-33.86,151.21,The Rocks Markets,-33.859208,151.208439,Flea Market
4,Dawes Point,-33.86,151.21,La Renaissance Café Pâtisserie,-33.85926,151.208428,Bakery


In [54]:
venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bondi,22,22,22,22,22,22
Chippendale,98,98,98,98,98,98
Clovelly,18,18,18,18,18,18
Coogee,56,56,56,56,56,56
Daceyville,40,40,40,40,40,40
Darling Point,4,4,4,4,4,4
Darlinghurst,100,100,100,100,100,100
Darlington,75,75,75,75,75,75
Dawes Point,100,100,100,100,100,100
Double Bay,34,34,34,34,34,34


In [55]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 193 uniques categories.


In [56]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Aquarium,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Australian Restaurant,...,Turkish Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio,Zoo
0,Dawes Point,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Dawes Point,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Dawes Point,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Dawes Point,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Dawes Point,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [57]:
onehot.shape

(2053, 194)

In [58]:
grouped = onehot.groupby('Neighbourhood').mean().reset_index()
grouped

Unnamed: 0,Neighbourhood,American Restaurant,Aquarium,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Australian Restaurant,...,Turkish Restaurant,Used Bookstore,Vegetarian / Vegan Restaurant,Veterinarian,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio,Zoo
0,Bondi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Chippendale,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.030612,0.010204,...,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0
2,Clovelly,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Coogee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
4,Daceyville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Darling Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Darlinghurst,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0
7,Darlington,0.0,0.0,0.0,0.0,0.026667,0.0,0.0,0.0,0.013333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,0.0,0.013333,0.0
8,Dawes Point,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.07,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
9,Double Bay,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
grouped.shape

(40, 194)

In [60]:
num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bondi----
           venue  freq
0           Park  0.14
1           Café  0.14
2            Gym  0.09
3  Grocery Store  0.09
4    Pizza Place  0.09


----Chippendale----
                venue  freq
0                Café  0.17
1  Chinese Restaurant  0.11
2     Thai Restaurant  0.08
3         Coffee Shop  0.05
4    Ramen Restaurant  0.04


----Clovelly----
          venue  freq
0          Café  0.22
1         Beach  0.22
2   Pizza Place  0.06
3   Supermarket  0.06
4  Burger Joint  0.06


----Coogee----
                venue  freq
0                Café  0.20
1               Hotel  0.09
2                 Bar  0.07
3     Thai Restaurant  0.05
4  Italian Restaurant  0.04


----Daceyville----
                   venue  freq
0     Chinese Restaurant  0.18
1  Indonesian Restaurant  0.15
2                   Café  0.10
3          Grocery Store  0.08
4     Italian Restaurant  0.05


----Darling Point----
             venue  freq
0             Park  0.50
1  Harbor / Marina  0.25
2    Boat or Fer

### Converting Data Into A Pandas Dataframe

In [61]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [62]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bondi,Café,Park,Grocery Store,Pizza Place,Gym,Bakery,Gas Station,Coffee Shop,Thai Restaurant,Spa
1,Chippendale,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Dumpling Restaurant,Hotel,Asian Restaurant,Bar
2,Clovelly,Café,Beach,Bus Line,Bar,Pizza Place,Hotel,Burger Joint,Thai Restaurant,Playground,Supermarket
3,Coogee,Café,Hotel,Bar,Thai Restaurant,Italian Restaurant,Gym / Fitness Center,French Restaurant,Fish & Chips Shop,Park,Beach
4,Daceyville,Chinese Restaurant,Indonesian Restaurant,Café,Grocery Store,Malay Restaurant,Thai Restaurant,Italian Restaurant,Park,Dessert Shop,Fast Food Restaurant


### Cluster Neighbourhood

In [63]:
# set number of clusters
kclusters = 7

grouped_clustering = grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 0, 0, 0, 4, 1, 0, 0, 0, 0], dtype=int32)

In [64]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = df_location_SYD

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2000,Sydney,Dawes Point,-33.86,151.21,0,Café,Hotel,Australian Restaurant,Cocktail Bar,Pub,Hotel Bar,Sandwich Place,Burger Joint,Italian Restaurant,Ice Cream Shop
1,2000,Sydney,Haymarket,-33.88,151.2,0,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Dumpling Restaurant,Hotel,Asian Restaurant,Bar
2,2000,Sydney,Millers Point,-33.86,151.2,0,Café,Chinese Restaurant,Seafood Restaurant,Park,Steakhouse,Middle Eastern Restaurant,Brewery,Nature Preserve,Boat or Ferry,Planetarium
3,2000,Sydney,Sydney,-33.87,151.21,0,Café,Coffee Shop,Shopping Mall,Cocktail Bar,Hotel,Bar,Clothing Store,Speakeasy,Japanese Restaurant,Bookstore
4,2000,Sydney,The Rocks,-33.86,151.21,0,Café,Hotel,Australian Restaurant,Cocktail Bar,Pub,Hotel Bar,Sandwich Place,Burger Joint,Italian Restaurant,Ice Cream Shop


In [65]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

<a id='item2R'></a>

In [66]:
map_clusters

### Examining Clusters

### Cluster 1

In [67]:
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Dawes Point,0,Café,Hotel,Australian Restaurant,Cocktail Bar,Pub,Hotel Bar,Sandwich Place,Burger Joint,Italian Restaurant,Ice Cream Shop
1,Haymarket,0,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Dumpling Restaurant,Hotel,Asian Restaurant,Bar
2,Millers Point,0,Café,Chinese Restaurant,Seafood Restaurant,Park,Steakhouse,Middle Eastern Restaurant,Brewery,Nature Preserve,Boat or Ferry,Planetarium
3,Sydney,0,Café,Coffee Shop,Shopping Mall,Cocktail Bar,Hotel,Bar,Clothing Store,Speakeasy,Japanese Restaurant,Bookstore
4,The Rocks,0,Café,Hotel,Australian Restaurant,Cocktail Bar,Pub,Hotel Bar,Sandwich Place,Burger Joint,Italian Restaurant,Ice Cream Shop
5,World Square,0,Café,Thai Restaurant,Japanese Restaurant,Coffee Shop,Burger Joint,Cocktail Bar,Hotel,Sandwich Place,Breakfast Spot,Liquor Store
7,University Of Sydney,0,Café,Performing Arts Venue,Italian Restaurant,Library,Pub,Park,Farmers Market,College Rec Center,Coffee Shop,Music Venue
8,Ultimo,0,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Dumpling Restaurant,Hotel,Asian Restaurant,Bar
9,Chippendale,0,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Dumpling Restaurant,Hotel,Asian Restaurant,Bar
10,Darlington,0,Café,Bar,Pub,Pizza Place,Thai Restaurant,Fast Food Restaurant,Bakery,Restaurant,Burger Joint,Art Gallery


### Cluster 2

In [68]:
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Darling Point,1,Park,Boat or Ferry,Harbor / Marina,Zoo,Fish & Chips Shop,Fried Chicken Joint,French Restaurant,Fountain,Food Court,Food & Drink Shop
21,Point Piper,1,Park,Harbor / Marina,Zoo,Fish & Chips Shop,Fried Chicken Joint,French Restaurant,Fountain,Food Court,Food & Drink Shop,Flower Shop


### Cluster 3

In [69]:
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,Dover Heights,2,Playground,Park,Sporting Goods Shop,Zoo,French Restaurant,Fountain,Food Court,Food & Drink Shop,Flower Shop,Flea Market


### Cluster 4

In [70]:
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Bondi,3,Café,Park,Grocery Store,Pizza Place,Gym,Bakery,Gas Station,Coffee Shop,Thai Restaurant,Spa
23,Rose Bay,3,Park,Café,Harbor / Marina,Golf Course,Tennis Court,Gym,Thai Restaurant,Beer Garden,Fish & Chips Shop,Fountain
24,Vaucluse,3,Park,Beach,Café,Pizza Place,Sushi Restaurant,Nature Preserve,Shopping Mall,Lighthouse,Bakery,Fish & Chips Shop
25,Watsons Bay,3,Seafood Restaurant,Scenic Lookout,Harbor / Marina,Park,Ice Cream Shop,Lighthouse,National Park,Surf Spot,Fish & Chips Shop,Beer Garden
33,South Coogee,3,Bakery,Café,Trail,Liquor Store,Park,Fish & Chips Shop,Fried Chicken Joint,French Restaurant,Fountain,Food Court
37,Mcmahons Point,3,Park,Café,Pub,Australian Restaurant,Pier,Garden,Mediterranean Restaurant,Gym,Moroccan Restaurant,Harbor / Marina


### Cluster 5

In [71]:
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Daceyville,4,Chinese Restaurant,Indonesian Restaurant,Café,Grocery Store,Malay Restaurant,Thai Restaurant,Italian Restaurant,Park,Dessert Shop,Fast Food Restaurant
30,Kingsford,4,Indonesian Restaurant,Chinese Restaurant,Café,Italian Restaurant,Grocery Store,Fast Food Restaurant,Malay Restaurant,Thai Restaurant,Bar,Ramen Restaurant
31,Kensington,4,Chinese Restaurant,Indonesian Restaurant,Indian Restaurant,Convenience Store,Burger Joint,Café,Sushi Restaurant,Malay Restaurant,Liquor Store,Park


### Cluster 6

In [72]:
merged.loc[merged['Cluster Labels'] == 5, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Eastern Suburbs,5,Park,Liquor Store,Coffee Shop,Zoo,Fish & Chips Shop,Fried Chicken Joint,French Restaurant,Fountain,Food Court,Food & Drink Shop


### Cluster 7

In [73]:
merged.loc[merged['Cluster Labels'] == 6, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
34,North Sydney,6,Café,Gym,Coffee Shop,Japanese Restaurant,Juice Bar,Bakery,Bar,Sandwich Place,Italian Restaurant,Seafood Restaurant
35,Waverton,6,Café,Park,Grocery Store,Wine Shop,Indian Restaurant,Platform,Bowling Green,Scenic Lookout,Fast Food Restaurant,Fountain
36,Lavender Bay,6,Café,Juice Bar,Park,Dumpling Restaurant,Bakery,Moroccan Restaurant,Garden,Seafood Restaurant,Mediterranean Restaurant,Coffee Shop
38,North Sydney,6,Café,Gym,Coffee Shop,Japanese Restaurant,Juice Bar,Bakery,Bar,Sandwich Place,Italian Restaurant,Seafood Restaurant


<a id='item3'></a>
# 3. LONDON, UNITED KINGDOM

### Importing Data for London

In [74]:
import io

url3 = "https://raw.githubusercontent.com/see-scen-dev/Coursera_Capstone/master/London_Coorindates.csv"
s = requests.get(url3).content
df_location_LDN = pd.read_csv(io.StringIO(s.decode('utf-8')))
df_location_LDN

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,E1 6AN,City of London,Bishopsgate,51.518895,-0.078378
1,E1 7AA,City of London,Portsoken,51.515567,-0.075635
2,E1 7AX,City of London,Aldgate,51.515526,-0.078592
3,E1 8AT,City of London,Tower,51.511017,-0.073562
4,EC1A 1HQ,City of London,Farringdon Within,51.516359,-0.098906
5,EC1A 2AL,City of London,Farringdon Without,51.517473,-0.103327
6,EC1A 4AS,City of London,Cheap,51.516411,-0.097721
7,EC1A 4ER,City of London,Aldersgate,51.516554,-0.096562
8,EC1A 7AW,City of London,Bread Street,51.515069,-0.098109
9,EC1M 7AA,City of London,Cripplegate,51.522476,-0.097411


### Plotting A Map of London

In [75]:
latitude = 51.51279
longitude = -0.09184
print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of London are 51.51279, -0.09184.


In [76]:
map_London = folium.Map(location=[latitude, longitude], zoom_start=14)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_location_LDN['Latitude'], df_location_LDN['Longitude'], df_location_LDN['Borough'], df_location_LDN['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_London)  
    
map_London

### Exploring The Neighbourhoods

In [77]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [78]:
venues = getNearbyVenues(names=df_location_LDN['Neighbourhood'],
                                   latitudes=df_location_LDN['Latitude'],
                                   longitudes=df_location_LDN['Longitude']
                                  )

Bishopsgate
Portsoken
Aldgate
Tower
Farringdon Within
Farringdon Without
Cheap
Aldersgate
Bread Street
Cripplegate
Castle Baynard
Coleman Street
Broad Street
Cornhill
Walbrook
Lime Street
Bassishaw
Cordwainer
Bridge
Billingsgate
Langbourn
Candlewick
Vintry
Dowgate
Queenhithe


In [79]:
print(venues.shape)
venues.head()

(2441, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Bishopsgate,51.518895,-0.078378,Kastner & Ovens,51.517913,-0.076465,Café
1,Bishopsgate,51.518895,-0.078378,Ottolenghi,51.518272,-0.077177,Mediterranean Restaurant
2,Bishopsgate,51.518895,-0.078378,Pizza Union,51.517699,-0.077416,Pizza Place
3,Bishopsgate,51.518895,-0.078378,The Breakfast Club,51.518386,-0.078784,Breakfast Spot
4,Bishopsgate,51.518895,-0.078378,Old Spitalfields Market,51.519668,-0.075375,Flea Market


In [80]:
venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aldersgate,100,100,100,100,100,100
Aldgate,100,100,100,100,100,100
Bassishaw,100,100,100,100,100,100
Billingsgate,100,100,100,100,100,100
Bishopsgate,100,100,100,100,100,100
Bread Street,100,100,100,100,100,100
Bridge,100,100,100,100,100,100
Broad Street,100,100,100,100,100,100
Candlewick,96,96,96,96,96,96
Castle Baynard,100,100,100,100,100,100


In [81]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 152 uniques categories.


In [82]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Argentinian Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,...,Trail,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Bishopsgate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Bishopsgate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Bishopsgate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Bishopsgate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Bishopsgate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [83]:
onehot.shape

(2441, 153)

In [84]:
grouped = onehot.groupby('Neighbourhood').mean().reset_index()
grouped

Unnamed: 0,Neighbourhood,American Restaurant,Argentinian Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,...,Trail,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Aldersgate,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.01,0.01
1,Aldgate,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.0,0.01,...,0.0,0.01,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0
2,Bassishaw,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.03,...,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.01,0.02
3,Billingsgate,0.0,0.01,0.0,0.0,0.04,0.01,0.01,0.0,0.0,...,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
4,Bishopsgate,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,...,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.01,0.02,0.0
5,Bread Street,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.03,0.0,0.02,0.0,0.01,0.0
6,Bridge,0.0,0.01,0.0,0.0,0.04,0.01,0.01,0.0,0.0,...,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0
7,Broad Street,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.01
8,Candlewick,0.0,0.010417,0.0,0.0,0.020833,0.010417,0.0,0.0,0.0,...,0.010417,0.010417,0.010417,0.0,0.0,0.0,0.010417,0.0,0.0,0.0
9,Castle Baynard,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.01,0.01,0.03,0.0,0.0,0.0


In [85]:
grouped.shape

(25, 153)

In [86]:
num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Aldersgate----
                        venue  freq
0                 Coffee Shop  0.08
1          Italian Restaurant  0.05
2  Modern European Restaurant  0.04
3              Sandwich Place  0.03
4                 Art Gallery  0.03


----Aldgate----
                venue  freq
0         Coffee Shop  0.08
1         Salad Place  0.05
2               Hotel  0.04
3  Italian Restaurant  0.04
4        Cocktail Bar  0.04


----Bassishaw----
                venue  freq
0         Coffee Shop  0.10
1  Italian Restaurant  0.06
2         Art Gallery  0.03
3    Sushi Restaurant  0.03
4          Steakhouse  0.03


----Billingsgate----
                  venue  freq
0           Coffee Shop  0.08
1                 Hotel  0.08
2                   Pub  0.06
3  Gym / Fitness Center  0.05
4            Restaurant  0.04


----Bishopsgate----
                  venue  freq
0           Coffee Shop  0.07
1                   Pub  0.05
2            Food Truck  0.05
3  Gym / Fitness Center  0.04
4               

### Converting Data Into A Pandas Dataframe

In [87]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [88]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Aldersgate,Coffee Shop,Italian Restaurant,Modern European Restaurant,Gym / Fitness Center,Art Gallery,Bakery,Steakhouse,Japanese Restaurant,Restaurant,Sandwich Place
1,Aldgate,Coffee Shop,Salad Place,Italian Restaurant,Cocktail Bar,Hotel,Gym / Fitness Center,Sushi Restaurant,Japanese Restaurant,Restaurant,Pub
2,Bassishaw,Coffee Shop,Italian Restaurant,Hotel,Bakery,Art Gallery,Sushi Restaurant,Steakhouse,Seafood Restaurant,Scenic Lookout,Sandwich Place
3,Billingsgate,Coffee Shop,Hotel,Pub,Gym / Fitness Center,Asian Restaurant,Restaurant,French Restaurant,Sandwich Place,Garden,Salad Place
4,Bishopsgate,Coffee Shop,Food Truck,Pub,Gym / Fitness Center,Chinese Restaurant,Bar,Cocktail Bar,Pizza Place,Hotel,Thai Restaurant


### Cluster Neighbourhood

In [89]:
# set number of clusters
kclusters = 7

grouped_clustering = grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 0, 3, 4, 0, 2, 1, 2, 6], dtype=int32)

In [90]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = df_location_LDN

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,E1 6AN,City of London,Bishopsgate,51.518895,-0.078378,4,Coffee Shop,Food Truck,Pub,Gym / Fitness Center,Chinese Restaurant,Bar,Cocktail Bar,Pizza Place,Hotel,Thai Restaurant
1,E1 7AA,City of London,Portsoken,51.515567,-0.075635,4,Coffee Shop,Hotel,Pub,Gym / Fitness Center,Cocktail Bar,Salad Place,Restaurant,Art Gallery,Indian Restaurant,Pizza Place
2,E1 7AX,City of London,Aldgate,51.515526,-0.078592,1,Coffee Shop,Salad Place,Italian Restaurant,Cocktail Bar,Hotel,Gym / Fitness Center,Sushi Restaurant,Japanese Restaurant,Restaurant,Pub
3,E1 8AT,City of London,Tower,51.511017,-0.073562,3,Hotel,Coffee Shop,Cocktail Bar,Gym / Fitness Center,French Restaurant,Castle,Café,Pub,Indian Restaurant,Restaurant
4,EC1A 1HQ,City of London,Farringdon Within,51.516359,-0.098906,0,Coffee Shop,Italian Restaurant,Gym / Fitness Center,Japanese Restaurant,Wine Bar,Hotel,French Restaurant,Modern European Restaurant,English Restaurant,Park


In [91]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=14)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

<a id='item3R'></a>

In [92]:
map_clusters

### Examining Clusters

### Cluster 1

In [93]:
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Farringdon Within,0,Coffee Shop,Italian Restaurant,Gym / Fitness Center,Japanese Restaurant,Wine Bar,Hotel,French Restaurant,Modern European Restaurant,English Restaurant,Park
6,Cheap,0,Coffee Shop,Italian Restaurant,Modern European Restaurant,Restaurant,Art Gallery,Vietnamese Restaurant,Japanese Restaurant,Sandwich Place,Gym / Fitness Center,Burger Joint
7,Aldersgate,0,Coffee Shop,Italian Restaurant,Modern European Restaurant,Gym / Fitness Center,Art Gallery,Bakery,Steakhouse,Japanese Restaurant,Restaurant,Sandwich Place
8,Bread Street,0,Coffee Shop,Italian Restaurant,Modern European Restaurant,Japanese Restaurant,Sandwich Place,Gym / Fitness Center,Vietnamese Restaurant,Plaza,Scenic Lookout,Falafel Restaurant
16,Bassishaw,0,Coffee Shop,Italian Restaurant,Hotel,Bakery,Art Gallery,Sushi Restaurant,Steakhouse,Seafood Restaurant,Scenic Lookout,Sandwich Place
22,Vintry,0,Coffee Shop,Italian Restaurant,Sandwich Place,Steakhouse,Restaurant,Asian Restaurant,Vietnamese Restaurant,Bar,Modern European Restaurant,Gym / Fitness Center
24,Queenhithe,0,Coffee Shop,Italian Restaurant,Sandwich Place,Wine Bar,Restaurant,Vietnamese Restaurant,Gym / Fitness Center,Asian Restaurant,Japanese Restaurant,Seafood Restaurant


### Cluster 2

In [94]:
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Aldgate,1,Coffee Shop,Salad Place,Italian Restaurant,Cocktail Bar,Hotel,Gym / Fitness Center,Sushi Restaurant,Japanese Restaurant,Restaurant,Pub
12,Broad Street,1,Coffee Shop,Restaurant,Hotel,Salad Place,Japanese Restaurant,Sandwich Place,Gym / Fitness Center,Pub,Italian Restaurant,Boxing Gym
13,Cornhill,1,Coffee Shop,Restaurant,Salad Place,Hotel,Pizza Place,Pub,Cocktail Bar,Gym / Fitness Center,Sandwich Place,Indian Restaurant
15,Lime Street,1,Coffee Shop,Restaurant,Hotel,Gym / Fitness Center,Pizza Place,Salad Place,Pub,Indian Restaurant,Cocktail Bar,Steakhouse


### Cluster 3

In [95]:
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Walbrook,2,Coffee Shop,Hotel,Restaurant,Salad Place,Italian Restaurant,Cocktail Bar,Gym / Fitness Center,Steakhouse,French Restaurant,Pub
17,Cordwainer,2,Coffee Shop,Gym / Fitness Center,Italian Restaurant,Pub,Restaurant,Asian Restaurant,Hotel,French Restaurant,Sandwich Place,Pizza Place
18,Bridge,2,Coffee Shop,Pub,Hotel,Gym / Fitness Center,Asian Restaurant,French Restaurant,Salad Place,Restaurant,Italian Restaurant,Sandwich Place
21,Candlewick,2,Coffee Shop,Pub,Gym / Fitness Center,French Restaurant,Sandwich Place,Italian Restaurant,Restaurant,Historic Site,Hotel,Burger Joint
23,Dowgate,2,Coffee Shop,Pub,Gym / Fitness Center,Italian Restaurant,Restaurant,Sandwich Place,Cocktail Bar,Seafood Restaurant,Steakhouse,French Restaurant


### Cluster 4

In [96]:
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Tower,3,Hotel,Coffee Shop,Cocktail Bar,Gym / Fitness Center,French Restaurant,Castle,Café,Pub,Indian Restaurant,Restaurant
19,Billingsgate,3,Coffee Shop,Hotel,Pub,Gym / Fitness Center,Asian Restaurant,Restaurant,French Restaurant,Sandwich Place,Garden,Salad Place
20,Langbourn,3,Hotel,Coffee Shop,Gym / Fitness Center,Cocktail Bar,Restaurant,Pub,Salad Place,Garden,French Restaurant,Italian Restaurant


### Cluster 5

In [97]:
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bishopsgate,4,Coffee Shop,Food Truck,Pub,Gym / Fitness Center,Chinese Restaurant,Bar,Cocktail Bar,Pizza Place,Hotel,Thai Restaurant
1,Portsoken,4,Coffee Shop,Hotel,Pub,Gym / Fitness Center,Cocktail Bar,Salad Place,Restaurant,Art Gallery,Indian Restaurant,Pizza Place


### Cluster 6

In [98]:
merged.loc[merged['Cluster Labels'] == 5, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Cripplegate,5,Coffee Shop,Pub,Café,Hotel,Food Truck,Italian Restaurant,Gym / Fitness Center,Vietnamese Restaurant,Concert Hall,Sandwich Place
11,Coleman Street,5,Coffee Shop,Hotel,Gym / Fitness Center,Food Truck,Bar,Sandwich Place,Pub,Ramen Restaurant,Café,Vietnamese Restaurant


### Cluster 7

In [99]:
merged.loc[merged['Cluster Labels'] == 6, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Farringdon Without,6,Coffee Shop,Italian Restaurant,Pub,Wine Bar,Gym / Fitness Center,Falafel Restaurant,Sandwich Place,French Restaurant,Beer Bar,Burrito Place
10,Castle Baynard,6,Coffee Shop,Italian Restaurant,Pub,Sandwich Place,French Restaurant,Hotel,Beer Bar,Sushi Restaurant,Salad Place,Gym / Fitness Center


<a id='item4'></a>
# 4. NEW YORK, UNITED STATES

### Importing Data for New York

In [100]:
import io

url4 = "https://raw.githubusercontent.com/see-scen-dev/Coursera_Capstone/master/New_York_Coorindates.csv"
s = requests.get(url4).content
df_location_NY = pd.read_csv(io.StringIO(s.decode('utf-8')))
df_location_NY

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Manhattan,Marble Hill,40.876551,-73.91066
1,Manhattan,Chinatown,40.715618,-73.994279
2,Manhattan,Washington Heights,40.851903,-73.9369
3,Manhattan,Inwood,40.867684,-73.92121
4,Manhattan,Hamilton Heights,40.823604,-73.949688
5,Manhattan,Manhattanville,40.816934,-73.957385
6,Manhattan,Central Harlem,40.815976,-73.943211
7,Manhattan,East Harlem,40.792249,-73.944182
8,Manhattan,Upper East Side,40.775639,-73.960508
9,Manhattan,Yorkville,40.77593,-73.947118


### Plotting A Map of New York

In [101]:
latitude = 40.7127281
longitude = -74.0060152
print('The geograpical coordinate of New York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York are 40.7127281, -74.0060152.


In [102]:
map_New_York = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_location_NY['Latitude'], df_location_NY['Longitude'], df_location_NY['Borough'], df_location_NY['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_New_York)  
    
map_New_York

### Exploring The Neighbourhoods

In [103]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [104]:
venues = getNearbyVenues(names=df_location_NY['Neighbourhood'],
                                   latitudes=df_location_NY['Latitude'],
                                   longitudes=df_location_NY['Longitude']
                                  )

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards


In [105]:
print(venues.shape)
venues.head()

(3330, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Marble Hill,40.876551,-73.91066,Arturo's,40.874412,-73.910271,Pizza Place
1,Marble Hill,40.876551,-73.91066,Bikram Yoga,40.876844,-73.906204,Yoga Studio
2,Marble Hill,40.876551,-73.91066,Tibbett Diner,40.880404,-73.908937,Diner
3,Marble Hill,40.876551,-73.91066,Starbucks,40.877531,-73.905582,Coffee Shop
4,Marble Hill,40.876551,-73.91066,Dunkin',40.877136,-73.906666,Donut Shop


In [106]:
venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Battery Park City,100,100,100,100,100,100
Carnegie Hill,100,100,100,100,100,100
Central Harlem,47,47,47,47,47,47
Chelsea,100,100,100,100,100,100
Chinatown,100,100,100,100,100,100
Civic Center,100,100,100,100,100,100
Clinton,100,100,100,100,100,100
East Harlem,44,44,44,44,44,44
East Village,100,100,100,100,100,100
Financial District,100,100,100,100,100,100


In [107]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 334 uniques categories.


In [108]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Arcade,Arepa Restaurant,...,Volleyball Court,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Marble Hill,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [109]:
onehot.shape

(3330, 335)

In [110]:
grouped = onehot.groupby('Neighbourhood').mean().reset_index()
grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Animal Shelter,Antique Shop,Arcade,Arepa Restaurant,...,Volleyball Court,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Battery Park City,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.02,0.0
1,Carnegie Hill,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.03
2,Central Harlem,0.0,0.0,0.0,0.06383,0.042553,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chelsea,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0
4,Chinatown,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Civic Center,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.03
6,Clinton,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.02,0.03,0.0,0.0,0.0
7,East Harlem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,East Village,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.05,0.02,0.0,0.0,0.0
9,Financial District,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.01,0.0


In [111]:
grouped.shape

(40, 335)

In [112]:
num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Battery Park City----
           venue  freq
0           Park  0.08
1    Coffee Shop  0.07
2          Hotel  0.05
3            Gym  0.04
4  Memorial Site  0.04


----Carnegie Hill----
                 venue  freq
0          Pizza Place  0.06
1          Coffee Shop  0.06
2                 Café  0.04
3  Japanese Restaurant  0.03
4    French Restaurant  0.03


----Central Harlem----
                  venue  freq
0    African Restaurant  0.06
1  Gym / Fitness Center  0.04
2           Art Gallery  0.04
3    Chinese Restaurant  0.04
4     French Restaurant  0.04


----Chelsea----
                venue  freq
0         Coffee Shop  0.06
1  Italian Restaurant  0.05
2      Ice Cream Shop  0.05
3              Bakery  0.04
4           Nightclub  0.04


----Chinatown----
                 venue  freq
0   Chinese Restaurant  0.09
1         Cocktail Bar  0.04
2  American Restaurant  0.04
3  Dumpling Restaurant  0.03
4       Ice Cream Shop  0.03


----Civic Center----
                  venue  freq


### Converting Data Into A Pandas Dataframe

In [113]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [114]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Park,Coffee Shop,Hotel,Gym,Memorial Site,Wine Shop,Clothing Store,Italian Restaurant,Ice Cream Shop,Food Court
1,Carnegie Hill,Pizza Place,Coffee Shop,Café,Bookstore,Cosmetics Shop,French Restaurant,Bar,Japanese Restaurant,Spa,Grocery Store
2,Central Harlem,African Restaurant,Art Gallery,Cosmetics Shop,Chinese Restaurant,American Restaurant,Seafood Restaurant,Gym / Fitness Center,French Restaurant,Public Art,Dessert Shop
3,Chelsea,Coffee Shop,Ice Cream Shop,Italian Restaurant,Bakery,Nightclub,Theater,Seafood Restaurant,American Restaurant,Hotel,Art Gallery
4,Chinatown,Chinese Restaurant,Cocktail Bar,American Restaurant,Ice Cream Shop,Salon / Barbershop,Dumpling Restaurant,Spa,Dim Sum Restaurant,Bubble Tea Shop,Vietnamese Restaurant


### Cluster Neighbourhood

In [115]:
# set number of clusters
kclusters = 7

grouped_clustering = grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 3, 0, 5, 5, 5, 2, 0, 5], dtype=int32)

In [116]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = df_location_NY

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

merged.head() # check the last columns!

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,6,Coffee Shop,Discount Store,Sandwich Place,Yoga Studio,Tennis Stadium,Supplement Shop,Steakhouse,Spa,Seafood Restaurant,Clothing Store
1,Manhattan,Chinatown,40.715618,-73.994279,5,Chinese Restaurant,Cocktail Bar,American Restaurant,Ice Cream Shop,Salon / Barbershop,Dumpling Restaurant,Spa,Dim Sum Restaurant,Bubble Tea Shop,Vietnamese Restaurant
2,Manhattan,Washington Heights,40.851903,-73.9369,2,Café,Mobile Phone Shop,Bakery,Spanish Restaurant,Deli / Bodega,Mexican Restaurant,Sandwich Place,New American Restaurant,Park,Supplement Shop
3,Manhattan,Inwood,40.867684,-73.92121,2,Café,Mexican Restaurant,Bakery,Lounge,Pizza Place,Wine Bar,Deli / Bodega,American Restaurant,Frozen Yogurt Shop,Chinese Restaurant
4,Manhattan,Hamilton Heights,40.823604,-73.949688,2,Deli / Bodega,Café,Mexican Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Sushi Restaurant,Caribbean Restaurant,School,Bakery


In [117]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

<a id='item4R'></a>

In [118]:
map_clusters

### Examining Clusters

### Cluster 1

In [119]:
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,40.77593,Italian Restaurant,Coffee Shop,Gym,Bar,Pizza Place,Sushi Restaurant,Japanese Restaurant,Wine Shop,Mexican Restaurant,Diner
10,40.768113,Coffee Shop,Italian Restaurant,Pizza Place,Sushi Restaurant,Gym / Fitness Center,Sporting Goods Shop,Café,Gym,Cosmetics Shop,Burger Joint
12,40.787658,Italian Restaurant,Wine Bar,Bar,Vegetarian / Vegan Restaurant,Indian Restaurant,Bakery,Mediterranean Restaurant,Coffee Shop,Yoga Studio,Sushi Restaurant
17,40.744035,Coffee Shop,Ice Cream Shop,Italian Restaurant,Bakery,Nightclub,Theater,Seafood Restaurant,American Restaurant,Hotel,Art Gallery
18,40.726933,Italian Restaurant,Clothing Store,Sushi Restaurant,Ice Cream Shop,French Restaurant,Seafood Restaurant,Café,Indian Restaurant,Cocktail Bar,Gourmet Shop
19,40.727847,Bar,Wine Bar,Chinese Restaurant,Mexican Restaurant,Ice Cream Shop,Pizza Place,Coffee Shop,Ramen Restaurant,Vegetarian / Vegan Restaurant,Cocktail Bar
24,40.734434,Italian Restaurant,New American Restaurant,Cosmetics Shop,Wine Bar,American Restaurant,Jazz Club,Park,Coffee Shop,Bakery,Gastropub
25,40.797307,Indian Restaurant,Coffee Shop,Pizza Place,Yoga Studio,Mexican Restaurant,Café,Bar,Thai Restaurant,Deli / Bodega,Szechuan Restaurant
27,40.73721,Italian Restaurant,Pizza Place,American Restaurant,Bagel Shop,Bar,Cocktail Bar,Hotel,Thai Restaurant,Mexican Restaurant,Thrift / Vintage Store
30,40.782683,Pizza Place,Coffee Shop,Café,Bookstore,Cosmetics Shop,French Restaurant,Bar,Japanese Restaurant,Spa,Grocery Store


### Cluster 2

In [120]:
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,40.773529,Gym / Fitness Center,Theater,Concert Hall,Café,Plaza,Performing Arts Venue,French Restaurant,Italian Restaurant,Park,Opera House
21,40.721522,Spa,Park,Café,Italian Restaurant,American Restaurant,Boutique,Greek Restaurant,Wine Shop,Wine Bar,Coffee Shop
23,40.722184,Clothing Store,Boutique,Shoe Store,Women's Store,Sporting Goods Shop,Art Gallery,Bakery,Italian Restaurant,Mediterranean Restaurant,Men's Store
26,40.808,Park,Bookstore,American Restaurant,Coffee Shop,Food Truck,Burger Joint,New American Restaurant,Tennis Court,Deli / Bodega,College Cafeteria
28,40.711932,Park,Coffee Shop,Hotel,Gym,Memorial Site,Wine Shop,Clothing Store,Italian Restaurant,Ice Cream Shop,Food Court
39,40.756658,American Restaurant,Italian Restaurant,Coffee Shop,Café,Restaurant,Gym / Fitness Center,Hotel,Salad Place,Gym,Park


### Cluster 3

In [121]:
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,40.851903,Café,Mobile Phone Shop,Bakery,Spanish Restaurant,Deli / Bodega,Mexican Restaurant,Sandwich Place,New American Restaurant,Park,Supplement Shop
3,40.867684,Café,Mexican Restaurant,Bakery,Lounge,Pizza Place,Wine Bar,Deli / Bodega,American Restaurant,Frozen Yogurt Shop,Chinese Restaurant
4,40.823604,Deli / Bodega,Café,Mexican Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Sushi Restaurant,Caribbean Restaurant,School,Bakery
5,40.816934,Deli / Bodega,Park,Mexican Restaurant,Coffee Shop,Italian Restaurant,Seafood Restaurant,Food Court,Bike Trail,Lounge,Sushi Restaurant
7,40.792249,Mexican Restaurant,Bakery,Deli / Bodega,Latin American Restaurant,Thai Restaurant,Convenience Store,Sandwich Place,Gas Station,Taco Place,Steakhouse
11,40.76216,Sandwich Place,Coffee Shop,Deli / Bodega,Park,Pizza Place,Greek Restaurant,Bus Stop,Baseball Field,Liquor Store,Outdoors & Recreation
36,40.746917,Park,Mexican Restaurant,Café,Greek Restaurant,Asian Restaurant,Deli / Bodega,Pizza Place,Hotel,Dog Run,Spa


### Cluster 4

In [122]:
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,40.815976,African Restaurant,Art Gallery,Cosmetics Shop,Chinese Restaurant,American Restaurant,Seafood Restaurant,Gym / Fitness Center,French Restaurant,Public Art,Dessert Shop


### Cluster 5

In [123]:
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,40.731,Bar,Park,Playground,Pet Service,Farmers Market,Baseball Field,Fountain,Harbor / Marina,Cocktail Bar,Coffee Shop


### Cluster 6

In [124]:
merged.loc[merged['Cluster Labels'] == 5, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,40.715618,Chinese Restaurant,Cocktail Bar,American Restaurant,Ice Cream Shop,Salon / Barbershop,Dumpling Restaurant,Spa,Dim Sum Restaurant,Bubble Tea Shop,Vietnamese Restaurant
8,40.775639,Italian Restaurant,Exhibit,Coffee Shop,Juice Bar,Bakery,Gym / Fitness Center,Art Gallery,French Restaurant,Spa,Hotel
14,40.759101,Theater,Gym / Fitness Center,Italian Restaurant,American Restaurant,Hotel,Spa,Coffee Shop,Wine Shop,Sandwich Place,Cocktail Bar
15,40.754691,Hotel,Coffee Shop,Theater,Clothing Store,American Restaurant,Cocktail Bar,Japanese Restaurant,Steakhouse,Bakery,Bookstore
16,40.748303,Coffee Shop,Hotel,Japanese Restaurant,Sandwich Place,Salon / Barbershop,Gym,French Restaurant,Italian Restaurant,Bakery,Bar
20,40.717807,Coffee Shop,Café,Ramen Restaurant,Pizza Place,Cocktail Bar,Sandwich Place,Chinese Restaurant,Art Gallery,Bakery,Park
22,40.719324,Bakery,Sandwich Place,Salon / Barbershop,Café,Italian Restaurant,Bubble Tea Shop,Hotel,Mediterranean Restaurant,Seafood Restaurant,Clothing Store
29,40.707107,Coffee Shop,Steakhouse,Wine Shop,Hotel,Gym,American Restaurant,Pizza Place,Juice Bar,Italian Restaurant,Café
32,40.715229,Italian Restaurant,Gym / Fitness Center,Sandwich Place,Hotel,French Restaurant,Coffee Shop,Sporting Goods Shop,Spa,Park,Bakery
33,40.74851,Korean Restaurant,Hotel,Hotel Bar,Japanese Restaurant,Coffee Shop,Cosmetics Shop,Cocktail Bar,American Restaurant,Yoga Studio,Lingerie Store


### Cluster 7

In [125]:
merged.loc[merged['Cluster Labels'] == 6, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,40.876551,Coffee Shop,Discount Store,Sandwich Place,Yoga Studio,Tennis Stadium,Supplement Shop,Steakhouse,Spa,Seafood Restaurant,Clothing Store


<a id='item5'></a>
# 5. BRINGING ALL CITIES TOGETHER:
# SYDNEY (AU), TORONTO (CA), LONDON (UK) AND NEW YORK (US)

### Importing Data for All Cities

In [126]:
import io

url5 = "https://raw.githubusercontent.com/see-scen-dev/Coursera_Capstone/master/All_Coorindates.csv"
s = requests.get(url5).content
df_location_ALL = pd.read_csv(io.StringIO(s.decode('utf-8')))
df_location_ALL

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654,-79.361
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657,-79.379
2,M5C,Downtown Toronto,St. James Town,43.651,-79.375
3,M5E,Downtown Toronto,Berczy Park,43.645,-79.373
4,M5G,Downtown Toronto,Central Bay Street,43.658,-79.387
5,M6G,Downtown Toronto,Christie,43.670,-79.423
6,M5H,Downtown Toronto,"Adelaide, King",43.651,-79.385
7,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands",43.641,-79.382
8,M5K,Downtown Toronto,"Design Exchange, Toronto Dominion Centre",43.647,-79.382
9,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648,-79.380


### Plotting A Map of The World

In [127]:
latitude = 0
longitude = 0
print('The geograpical coordinate of The World are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of The World are 0, 0.


In [128]:
map_World = folium.Map(location=[latitude, longitude], zoom_start=2)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_location_ALL['Latitude'], df_location_ALL['Longitude'], df_location_ALL['Borough'], df_location_ALL['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_World)  
    
map_World

### Exploring The Neighbourhoods

In [129]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [130]:
venues = getNearbyVenues(names=df_location_ALL['Neighbourhood'],
                                   latitudes=df_location_ALL['Latitude'],
                                   longitudes=df_location_ALL['Longitude']
                                  )

Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Christie
Adelaide, King
Harbourfront East, Toronto Islands
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park
CN Tower, Bathurst Quay
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
First Canadian Place, Underground city
Church and Wellesley
Dawes Point
Haymarket
Millers Point
Sydney
The Rocks
World Square
Eastern Suburbs
University Of Sydney
Ultimo
Chippendale
Darlington
Pyrmont
Surry Hills
Darlinghurst
Woolloomooloo
Elizabeth Bay
Potts Point
Rushcutters Bay
Bondi
Darling Point
Edgecliff
Point Piper
Double Bay
Rose Bay
Vaucluse
Watsons Bay
Dover Heights
Clovelly
Randwick
Daceyville
Kingsford
Kensington
Coogee
South Coogee
North Sydney
Waverton
Lavender Bay
Mcmahons Point
North Sydney
Kirribilli
Milsons Point
Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central 

In [131]:
print(venues.shape)
venues.head()

(9088, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront, Regent Park",43.654,-79.361,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Harbourfront, Regent Park",43.654,-79.361,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Harbourfront, Regent Park",43.654,-79.361,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,"Harbourfront, Regent Park",43.654,-79.361,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Harbourfront, Regent Park",43.654,-79.361,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [132]:
venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King",100,100,100,100,100,100
Aldersgate,100,100,100,100,100,100
Aldgate,100,100,100,100,100,100
Bassishaw,100,100,100,100,100,100
Battery Park City,97,97,97,97,97,97
Berczy Park,54,54,54,54,54,54
Billingsgate,100,100,100,100,100,100
Bishopsgate,100,100,100,100,100,100
Bondi,22,22,22,22,22,22
Bread Street,100,100,100,100,100,100


In [133]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 394 uniques categories.


In [134]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighbourhood'] = venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [135]:
onehot.shape

(9088, 395)

In [136]:
grouped = onehot.groupby('Neighbourhood').mean().reset_index()
grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Watch Shop,Waterfront,Weight Loss Center,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Adelaide, King",0.010000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
1,Aldersgate,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.020000,0.000000,0.000000,0.010000,0.000000,0.0
2,Aldgate,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.030000,0.010000,0.000000,0.000000,0.000000,0.0
3,Bassishaw,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.020000,0.000000,0.000000,0.010000,0.020000,0.0
4,Battery Park City,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.000000,0.030928,0.000000,0.010309,0.000000,0.0
5,Berczy Park,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
6,Billingsgate,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.010000,0.000000,0.000000,0.000000,0.000000,0.0
7,Bishopsgate,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.020000,0.010000,0.010000,0.020000,0.000000,0.0
8,Bondi,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0
9,Bread Street,0.000000,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,...,0.00,0.0,0.0,0.000000,0.020000,0.000000,0.000000,0.010000,0.000000,0.0


In [137]:
grouped.shape

(123, 395)

In [138]:
num_top_venues = 5

for hood in grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King----
             venue  freq
0      Coffee Shop  0.05
1              Bar  0.04
2       Steakhouse  0.04
3             Café  0.04
4  Thai Restaurant  0.04


----Aldersgate----
                        venue  freq
0                 Coffee Shop  0.06
1          Italian Restaurant  0.05
2  Modern European Restaurant  0.04
3              Sandwich Place  0.04
4                      Bakery  0.03


----Aldgate----
                venue  freq
0         Coffee Shop  0.07
1         Salad Place  0.04
2        Cocktail Bar  0.04
3               Hotel  0.04
4  Italian Restaurant  0.04


----Bassishaw----
                  venue  freq
0           Coffee Shop  0.12
1    Italian Restaurant  0.07
2      Sushi Restaurant  0.03
3                 Plaza  0.03
4  Gym / Fitness Center  0.03


----Battery Park City----
           venue  freq
0           Park  0.08
1    Coffee Shop  0.07
2          Hotel  0.05
3  Memorial Site  0.04
4            Gym  0.04


----Berczy Park----
          venue 

### Converting Data Into A Pandas Dataframe

In [139]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [140]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = grouped['Neighbourhood']

for ind in np.arange(grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King",Coffee Shop,Bar,Steakhouse,American Restaurant,Café,Thai Restaurant,Cosmetics Shop,Hotel,Sushi Restaurant,Gastropub
1,Aldersgate,Coffee Shop,Italian Restaurant,Sandwich Place,Modern European Restaurant,French Restaurant,Bakery,Art Gallery,Gym / Fitness Center,Cocktail Bar,Plaza
2,Aldgate,Coffee Shop,Cocktail Bar,Salad Place,Italian Restaurant,Hotel,Pizza Place,Gym / Fitness Center,Mediterranean Restaurant,Indian Restaurant,Sushi Restaurant
3,Bassishaw,Coffee Shop,Italian Restaurant,Café,Gym / Fitness Center,Plaza,Hotel,Bakery,Art Gallery,Sushi Restaurant,Yoga Studio
4,Battery Park City,Park,Coffee Shop,Hotel,Memorial Site,Gym,Boat or Ferry,Wine Shop,Men's Store,Shopping Mall,BBQ Joint


### Cluster Neighbourhood

In [141]:
# set number of clusters
kclusters = 7

grouped_clustering = grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 1, 1, 1, 3, 1, 1, 1, 6, 1], dtype=int32)

In [142]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = df_location_ALL

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654,-79.361,1,Coffee Shop,Theater,Bakery,Park,Mexican Restaurant,Breakfast Spot,Italian Restaurant,Pub,Café,Restaurant
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657,-79.379,1,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Bubble Tea Shop,Tea Room,Pizza Place,Diner,Theater
2,M5C,Downtown Toronto,St. James Town,43.651,-79.375,1,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Beer Bar,Breakfast Spot,Gastropub,Cocktail Bar,Bakery
3,M5E,Downtown Toronto,Berczy Park,43.645,-79.373,1,Coffee Shop,Cocktail Bar,Farmers Market,Restaurant,Bakery,Italian Restaurant,Seafood Restaurant,Café,Cheese Shop,Beer Bar
4,M5G,Downtown Toronto,Central Bay Street,43.658,-79.387,1,Coffee Shop,Sandwich Place,Italian Restaurant,Middle Eastern Restaurant,Burger Joint,Café,Salad Place,Sushi Restaurant,Spa,Chinese Restaurant


In [143]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=2)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

In [144]:
map_clusters

### Examining Clusters

### Cluster 1

In [145]:
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,Haymarket,0,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Hotel,Asian Restaurant,Dumpling Restaurant,Bar
20,Millers Point,0,Café,Seafood Restaurant,Chinese Restaurant,Park,Boat or Ferry,Australian Restaurant,Middle Eastern Restaurant,Pier,Coffee Shop,Nature Preserve
26,Ultimo,0,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Hotel,Asian Restaurant,Dumpling Restaurant,Bar
27,Chippendale,0,Café,Chinese Restaurant,Thai Restaurant,Coffee Shop,Malay Restaurant,Ramen Restaurant,Hotel,Asian Restaurant,Dumpling Restaurant,Bar
47,Daceyville,0,Chinese Restaurant,Indonesian Restaurant,Café,Grocery Store,Park,Malay Restaurant,Thai Restaurant,Italian Restaurant,Bus Station,Asian Restaurant
48,Kingsford,0,Chinese Restaurant,Indonesian Restaurant,Café,Grocery Store,Malay Restaurant,Bar,Fast Food Restaurant,Thai Restaurant,Italian Restaurant,Asian Restaurant
49,Kensington,0,Chinese Restaurant,Indonesian Restaurant,Department Store,Sushi Restaurant,Indian Restaurant,Pub,Convenience Store,Malay Restaurant,Park,Liquor Store


### Cluster 2

In [146]:
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Harbourfront, Regent Park",1,Coffee Shop,Theater,Bakery,Park,Mexican Restaurant,Breakfast Spot,Italian Restaurant,Pub,Café,Restaurant
1,"Ryerson, Garden District",1,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Bubble Tea Shop,Tea Room,Pizza Place,Diner,Theater
2,St. James Town,1,Coffee Shop,Café,Hotel,Restaurant,Italian Restaurant,Beer Bar,Breakfast Spot,Gastropub,Cocktail Bar,Bakery
3,Berczy Park,1,Coffee Shop,Cocktail Bar,Farmers Market,Restaurant,Bakery,Italian Restaurant,Seafood Restaurant,Café,Cheese Shop,Beer Bar
4,Central Bay Street,1,Coffee Shop,Sandwich Place,Italian Restaurant,Middle Eastern Restaurant,Burger Joint,Café,Salad Place,Sushi Restaurant,Spa,Chinese Restaurant
7,"Harbourfront East, Toronto Islands",1,Coffee Shop,Hotel,Italian Restaurant,Aquarium,Café,Scenic Lookout,Brewery,Fried Chicken Joint,Restaurant,Bakery
8,"Design Exchange, Toronto Dominion Centre",1,Coffee Shop,Café,Hotel,Restaurant,Bakery,Deli / Bodega,Gastropub,Bar,Italian Restaurant,American Restaurant
9,"Commerce Court, Victoria Hotel",1,Coffee Shop,Café,Hotel,American Restaurant,Restaurant,Bakery,Seafood Restaurant,Gastropub,Deli / Bodega,Steakhouse
12,"CN Tower, Bathurst Quay",1,Airport Service,Airport Terminal,Airport Lounge,Boutique,Boat or Ferry,Bar,Sculpture Garden,Coffee Shop,Plane,Harbor / Marina
14,Stn A PO Boxes 25 The Esplanade,1,Coffee Shop,Café,Restaurant,Italian Restaurant,Cocktail Bar,Bakery,Seafood Restaurant,Hotel,Steakhouse,Creperie


### Cluster 3

In [147]:
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Rosedale,2,Park,Building,Playground,Falafel Restaurant,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
44,Dover Heights,2,Playground,Park,Sporting Goods Shop,Zoo,Event Space,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store


### Cluster 4

In [148]:
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Adelaide, King",3,Coffee Shop,Bar,Steakhouse,American Restaurant,Café,Thai Restaurant,Cosmetics Shop,Hotel,Sushi Restaurant,Gastropub
10,"Harbord, University of Toronto",3,Café,Restaurant,Gym,Bookstore,Bar,Coffee Shop,Bakery,Japanese Restaurant,Italian Restaurant,Noodle House
11,"Chinatown, Grange Park",3,Café,Vegetarian / Vegan Restaurant,Bar,Mexican Restaurant,Dumpling Restaurant,Bakery,Coffee Shop,Chinese Restaurant,Vietnamese Restaurant,Gaming Cafe
18,Dawes Point,3,Café,Hotel,Australian Restaurant,Pub,Cocktail Bar,Hotel Bar,Burger Joint,Sandwich Place,History Museum,Restaurant
21,Sydney,3,Café,Coffee Shop,Shopping Mall,Hotel,Bar,Cocktail Bar,Japanese Restaurant,Clothing Store,Speakeasy,Bookstore
22,The Rocks,3,Café,Hotel,Australian Restaurant,Pub,Cocktail Bar,Hotel Bar,Burger Joint,Sandwich Place,History Museum,Restaurant
29,Pyrmont,3,Café,Hotel,Bar,Italian Restaurant,Australian Restaurant,Burger Joint,Ice Cream Shop,Steakhouse,Gym,Cocktail Bar
38,Edgecliff,3,Supermarket,Japanese Restaurant,Gym,Australian Restaurant,Café,Train Station,Sandwich Place,Bakery,Park,Italian Restaurant
59,Marble Hill,3,Coffee Shop,Spa,Seafood Restaurant,Tennis Stadium,Bank,Sandwich Place,Diner,Gym,Mexican Restaurant,Kids Store
60,Chinatown,3,Chinese Restaurant,Cocktail Bar,American Restaurant,Bakery,Spa,Dumpling Restaurant,Vietnamese Restaurant,Ice Cream Shop,Bubble Tea Shop,Noodle House


### Cluster 5

In [149]:
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Christie,4,Grocery Store,Café,Park,Baby Store,Italian Restaurant,Coffee Shop,Restaurant,Nightclub,Convenience Store,Diner
23,World Square,4,Café,Thai Restaurant,Coffee Shop,Japanese Restaurant,Burger Joint,Cocktail Bar,Breakfast Spot,Sandwich Place,Hotel,Tapas Restaurant
25,University Of Sydney,4,Café,Performing Arts Venue,Pub,Coffee Shop,Farmers Market,Middle Eastern Restaurant,Beer Garden,Beer Bar,Bar,Thai Restaurant
28,Darlington,4,Café,Bar,Pub,Thai Restaurant,Pizza Place,Fast Food Restaurant,Burger Joint,Bakery,Restaurant,Art Gallery
30,Surry Hills,4,Café,Pub,Lebanese Restaurant,Japanese Restaurant,Coffee Shop,Pizza Place,Breakfast Spot,Sandwich Place,Gym,Greek Restaurant
31,Darlinghurst,4,Café,Bar,Italian Restaurant,Pizza Place,Bookstore,Bakery,Pub,Japanese Restaurant,Indian Restaurant,Burger Joint
32,Woolloomooloo,4,Café,Hotel,Australian Restaurant,Italian Restaurant,Chinese Restaurant,Bar,Pub,Wine Bar,Restaurant,Harbor / Marina
33,Elizabeth Bay,4,Café,Park,Wine Bar,Italian Restaurant,Bar,Australian Restaurant,Japanese Restaurant,Chinese Restaurant,Hotel,Thai Restaurant
34,Potts Point,4,Café,Australian Restaurant,Italian Restaurant,Japanese Restaurant,Hotel,Coffee Shop,Bar,Sushi Restaurant,Chinese Restaurant,Wine Bar
35,Rushcutters Bay,4,Café,Italian Restaurant,Bar,Park,Pizza Place,Wine Bar,Hotel,Coffee Shop,Japanese Restaurant,Sushi Restaurant


### Cluster 6

In [150]:
merged.loc[merged['Cluster Labels'] == 5, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,Darling Point,5,Park,Boat or Ferry,Harbor / Marina,Exhibit,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
39,Point Piper,5,Harbor / Marina,Park,Zoo,Event Space,Dry Cleaner,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant


### Cluster 7

In [151]:
merged.loc[merged['Cluster Labels'] == 6, merged.columns[[2] + list(range(5, merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
24,Eastern Suburbs,6,Liquor Store,Park,Coffee Shop,Falafel Restaurant,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,English Restaurant
36,Bondi,6,Café,Park,Grocery Store,Gym,Pizza Place,Coffee Shop,Spa,Bus Station,Gas Station,Thai Restaurant
41,Rose Bay,6,Harbor / Marina,Café,Park,Gym,Golf Course,Thai Restaurant,Tennis Court,Beer Garden,Zoo,Empanada Restaurant
42,Vaucluse,6,Park,Bakery,Lighthouse,Nature Preserve,Shopping Mall,Sushi Restaurant,Pizza Place,Café,Beach,Zoo
43,Watsons Bay,6,Harbor / Marina,Park,Seafood Restaurant,Scenic Lookout,Ice Cream Shop,National Park,Fish & Chips Shop,Surf Spot,Bus Station,Beer Garden
51,South Coogee,6,Liquor Store,Park,Trail,Bakery,Café,Zoo,Dumpling Restaurant,Duty-free Shop,Eastern European Restaurant,Electronics Store
55,Mcmahons Point,6,Park,Café,Pier,Australian Restaurant,Garden,Harbor / Marina,Moroccan Restaurant,Mediterranean Restaurant,Gym,Pub
96,Stuyvesant Town,6,Park,Bar,Playground,Coffee Shop,Baseball Field,Basketball Court,Pet Service,Harbor / Marina,Heliport,Cocktail Bar


In [152]:
latitude = 43.6529
longitude = -79.3849
map_clusters_Toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_Toronto)

In [153]:
latitude = -33.86785
longitude = 151.20732
map_clusters_Sydney = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_Sydney)

In [154]:
latitude = 51.51279
longitude = -0.09184
map_clusters_London = folium.Map(location=[latitude, longitude], zoom_start=14)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_London)

In [155]:
latitude = 40.7127281
longitude = -74.0060152
map_clusters_New_York = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighbourhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters_New_York)

<a id='item5R'></a>

In [156]:
map_clusters_Toronto

In [157]:
map_clusters_Sydney

In [158]:
map_clusters_London

In [159]:
map_clusters_New_York