In [32]:
import pandas as pd
import numpy as np
import json
import requests
from bs4 import BeautifulSoup 
from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

In [33]:
tokyo_station = pd.read_csv('tokyo_station.csv')

In [34]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

tokyo_map = folium.Map(location=[35.652832, 139.839478], zoom_start=12)

#clustering based on corrdination
X = tokyo_station['latitude']
Y = tokyo_station['longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=4, random_state=0).fit(Z)

clusters = kmeans.labels_
colors = ['red', 'green', 'blue', 'yellow']
tokyo_station['Cluster'] = clusters

for latitude, longitude, cluster in zip(tokyo_station['latitude'], tokyo_station['longitude'], tokyo_station['Cluster']):
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        color='black',
        fill=True,
        fill_color=colors[cluster],
        fill_opacity=0.7).add_to(tokyo_map)  

tokyo_map

In [36]:
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude',  
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [46]:
LIMIT = 500 
radius = 1000 
CLIENT_ID = 'UVYBMLXOOI01RCFA4BAT4UYF2WSFSHLLGBGFVGYMIYDZUIA4'
CLIENT_SECRET = 'N3YQ5VWOFAQYEXAHJYTZHS2YXQ102UY2FQPXWW5I0C5RUA1Y'
VERSION = '20181020'

In [47]:
neighborhoods = tokyo_station[tokyo_station['Station'] == 'tokyo'].reset_index(drop=True)

#　hotel 4bf58dd8d48988d1fa931735
# bas station　52f2ab2ebcbc57f1066b8b4f
#　subway station　4bf58dd8d48988d1fd931735
# Sushi 4bf58dd8d48988d1d2941735

tokyo_venues_sushi = getNearbyVenues(names=tokyo_station['Station'], 
                                         latitudes=tokyo_station['latitude'], 
                                         longitudes=tokyo_station['longitude'], 
                                         radius=1000, categoryIds='4bf58dd8d48988d1fa931735')

In [48]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Neighborhood'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=1,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [49]:
map_tokyo_sushi = folium.Map(location=[latitude, longitude], zoom_start=10)
addToMap(tokyo_venues_sushi, 'black', tokyo_map)

tokyo_map

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [50]:
# one hot encoding
tokyo_onehot = pd.get_dummies(tokyo_venues_hotel[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tokyo_onehot['Neighborhood'] = tokyo_venues_hotel['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [tokyo_onehot.columns[-1]] + list(tokyo_onehot.columns[:-1])
tokyo_onehot = tokyo_onehot[fixed_columns]

tokyo_onehot.head()

Unnamed: 0,Neighborhood,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,Event Space,Gay Bar,Gym,Hostel,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
0,Tokyo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Tokyo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Tokyo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Tokyo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Tokyo,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [51]:
tokyo_grouped = tokyo_onehot.groupby('Neighborhood').mean().reset_index()
tokyo_grouped

Unnamed: 0,Neighborhood,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,Event Space,Gay Bar,Gym,Hostel,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
0,Akihabara,0.58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0
1,Ebisu,0.214286,0.071429,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,...,0.035714,0.0,0.178571,0.0,0.0,0.0,0.035714,0.0,0.035714,0.071429
2,Gotanda,0.081633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,...,0.0,0.0,0.142857,0.0,0.0,0.0,0.020408,0.0,0.0,0.0
3,Hamamatsucho,0.38,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.02,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Harajuku,0.078947,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,...,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Ikebukuro,0.22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,...,0.0,0.0,0.12,0.0,0.0,0.0,0.02,0.0,0.0,0.0
6,Kanda,0.52,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
7,Komagome,0.333333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Meguro,0.0625,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,...,0.03125,0.0,0.21875,0.0,0.0,0.0,0.03125,0.0,0.03125,0.03125
9,Mejiro,0.085106,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,...,0.0,0.0,0.085106,0.0,0.021277,0.0,0.0,0.0,0.0,0.0


In [52]:
# set number of clusters
kclusters = 5

tokyo_grouped_clustering = tokyo_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tokyo_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 4, 2, 1, 2, 3, 0, 4, 2, 2], dtype=int32)

In [53]:
# add clustering labels
tokyo_grouped.insert(0, 'Cluster Labels', kmeans.labels_)

tokyo_grouped

Unnamed: 0,Cluster Labels,Neighborhood,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,Event Space,Gay Bar,Gym,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
0,0,Akihabara,0.58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0
1,4,Ebisu,0.214286,0.071429,0.0,0.0,0.0,0.0,0.0,0.035714,...,0.035714,0.0,0.178571,0.0,0.0,0.0,0.035714,0.0,0.035714,0.071429
2,2,Gotanda,0.081633,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.142857,0.0,0.0,0.0,0.020408,0.0,0.0,0.0
3,1,Hamamatsucho,0.38,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2,Harajuku,0.078947,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,3,Ikebukuro,0.22,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.12,0.0,0.0,0.0,0.02,0.0,0.0,0.0
6,0,Kanda,0.52,0.0,0.0,0.0,0.02,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
7,4,Komagome,0.333333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,2,Meguro,0.0625,0.03125,0.0,0.0,0.0,0.0,0.0,0.03125,...,0.03125,0.0,0.21875,0.0,0.0,0.0,0.03125,0.0,0.03125,0.03125
9,2,Mejiro,0.085106,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.085106,0.0,0.021277,0.0,0.0,0.0,0.0,0.0


In [54]:
# add clustering labels
#tokyo_grouped.insert(0, 'Cluster Labels', kmeans.labels_)

tokyo_merged = tokyo_station
tokyo_merged = tokyo_merged.join(tokyo_grouped.set_index('Neighborhood'), on='Station')

tokyo_merged.head()


Unnamed: 0,Station,latitude,longitude,Cluster,Cluster Labels,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
0,Tokyo,35.681382,139.766084,3,1,0.32,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0
1,Yurakucho,35.675069,139.763328,3,1,0.36,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0
2,Shinbashi,35.665498,139.75964,3,1,0.44,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Hamamatsucho,35.655646,139.756749,3,1,0.38,0.0,0.0,0.02,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Tamachi,35.645736,139.747575,2,4,0.3125,0.0625,0.0,0.0,0.0,...,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tokyo_merged['latitude'], tokyo_merged['longitude'], tokyo_merged['Station'], tokyo_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [56]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 0]

Unnamed: 0,Station,latitude,longitude,Cluster,Cluster Labels,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
27,Akihabara,35.698683,139.774219,3,0,0.58,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0
28,Kanda,35.69169,139.770883,3,0,0.52,0.0,0.0,0.0,0.02,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0


In [57]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 1]

Unnamed: 0,Station,latitude,longitude,Cluster,Cluster Labels,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
0,Tokyo,35.681382,139.766084,3,1,0.32,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0
1,Yurakucho,35.675069,139.763328,3,1,0.36,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0
2,Shinbashi,35.665498,139.75964,3,1,0.44,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Hamamatsucho,35.655646,139.756749,3,1,0.38,0.0,0.0,0.02,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,Shinjuku,35.690921,139.700258,0,1,0.3,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25,Ueno,35.713768,139.777254,1,1,0.34,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.04,0.0
26,Okachimati,35.707438,139.774632,1,1,0.34,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0


In [58]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 2]

Unnamed: 0,Station,latitude,longitude,Cluster,Cluster Labels,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
5,Sinagawa,35.630152,139.74044,2,2,0.111111,0.044444,0.0,0.0,0.0,...,0.0,0.0,0.0,0.022222,0.0,0.0,0.0,0.0,0.0,0.044444
6,Oosaki,35.6197,139.728553,2,2,0.097561,0.0,0.0,0.0,0.0,...,0.0,0.0,0.097561,0.0,0.0,0.0,0.0,0.0,0.0,0.04878
7,Gotanda,35.626446,139.723444,2,2,0.081633,0.0,0.0,0.0,0.0,...,0.0,0.0,0.142857,0.0,0.0,0.0,0.020408,0.0,0.0,0.0
8,Meguro,35.633998,139.715828,2,2,0.0625,0.03125,0.0,0.0,0.0,...,0.03125,0.0,0.21875,0.0,0.0,0.0,0.03125,0.0,0.03125,0.03125
10,Shibuya,35.658517,139.701334,2,2,0.142857,0.0,0.020408,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408
11,Harajuku,35.670168,139.702687,0,2,0.078947,0.026316,0.0,0.0,0.0,...,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,Mejiro,35.721204,139.706587,0,2,0.085106,0.021277,0.0,0.0,0.0,...,0.0,0.0,0.085106,0.0,0.021277,0.0,0.0,0.0,0.0,0.0
23,Nippori,35.727772,139.770987,1,2,0.1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.16,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 3]

Unnamed: 0,Station,latitude,longitude,Cluster,Cluster Labels,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
12,Yoyogi,35.683061,139.702042,0,3,0.183673,0.020408,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0
14,Shinokubo,35.701306,139.700044,0,3,0.26,0.0,0.0,0.0,0.0,...,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,Ikebukuro,35.728926,139.71038,0,3,0.22,0.0,0.0,0.0,0.0,...,0.0,0.0,0.12,0.0,0.0,0.0,0.02,0.0,0.0,0.0
21,Tabata,35.738062,139.76086,1,3,0.190476,0.095238,0.0,0.0,0.0,...,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22,Nishinippori,35.732135,139.766787,1,3,0.171429,0.028571,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0
24,Uguisudani,35.720495,139.778837,1,3,0.2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.02,0.0


In [60]:
tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 4]

Unnamed: 0,Station,latitude,longitude,Cluster,Cluster Labels,Bed & Breakfast,Boarding House,Boutique,Café,Conference Room,...,Japanese Restaurant,Meeting Room,Motel,Office,Other Nightlife,Residential Building (Apartment / Condo),Resort,Sauna / Steam Room,Spa,Vacation Rental
4,Tamachi,35.645736,139.747575,2,4,0.3125,0.0625,0.0,0.0,0.0,...,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Ebisu,35.64669,139.710106,2,4,0.214286,0.071429,0.0,0.0,0.0,...,0.035714,0.0,0.178571,0.0,0.0,0.0,0.035714,0.0,0.035714,0.071429
15,Tkadanobaba,35.712285,139.703782,0,4,0.25641,0.025641,0.0,0.0,0.0,...,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.025641
18,Otsuka,35.731401,139.728662,1,4,0.276596,0.0,0.0,0.0,0.0,...,0.0,0.021277,0.106383,0.0,0.0,0.0,0.021277,0.0,0.0,0.0
19,Sugamo,35.733492,139.739345,1,4,0.357143,0.0,0.0,0.0,0.0,...,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20,Komagome,35.736489,139.746875,1,4,0.333333,0.041667,0.0,0.0,0.0,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [61]:
tokyo_merged.columns

Index(['Station', 'latitude', 'longitude', 'Cluster', 'Cluster Labels',
       'Bed & Breakfast', 'Boarding House', 'Boutique', 'Café',
       'Conference Room', 'Event Space', 'Gay Bar', 'Gym', 'Hostel', 'Hotel',
       'Hotel Pool', 'Housing Development', 'Inn', 'Japanese Restaurant',
       'Meeting Room', 'Motel', 'Office', 'Other Nightlife',
       'Residential Building (Apartment / Condo)', 'Resort',
       'Sauna / Steam Room', 'Spa', 'Vacation Rental'],
      dtype='object')