In [1]:
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
import json
from pandas.io.json import json_normalize
import numpy as np

<h3>Pt 1: Data Scaping</h3>

In [2]:
# send the GET request
data = requests.get('https://en.wikipedia.org/wiki/Special_wards_of_Tokyo').text

# parse data from the html into a BeautifulSoup object
soup = BeautifulSoup(data, 'lxml')

tokyo_table=soup.find('table', {"class":"wikitable sortable"})

In [3]:
num = []
Name=[]
Major_District = []

for row in tokyo_table.findAll("tr"):
    #print (row)    
    Ward = row.findAll('td')
    #print (len(Ward))
    #print (Ward)
    if len(Ward)==8: #Only extract table body not heading
        num.append(Ward[0].find(text=True))
        Name.append(Ward[2])
        Major_District.append(Ward[7].find(text=True))

In [4]:
flat_name_list = [item for sublist in Name for item in sublist]
Wards_names = [name_wards.get('title') for name_wards in flat_name_list]
Wards_names.remove(None)
Wards_names

['Chiyoda, Tokyo',
 'Chūō, Tokyo',
 'Minato, Tokyo',
 'Shinjuku',
 'Bunkyō',
 'Taitō',
 'Sumida, Tokyo',
 'Koto, Tokyo',
 'Shinagawa',
 'Meguro',
 'Ōta, Tokyo',
 'Setagaya',
 'Shibuya',
 'Nakano, Tokyo',
 'Suginami',
 'Toshima',
 'Kita, Tokyo',
 'Arakawa, Tokyo',
 'Itabashi',
 'Nerima',
 'Adachi, Tokyo',
 'Katsushika',
 'Edogawa, Tokyo']

In [5]:
tokyo_df = pd.DataFrame({"Ward Name": Wards_names,
                         "Major District": Major_District})

In [6]:
tokyo_df

Unnamed: 0,Ward Name,Major District
0,"Chiyoda, Tokyo",Nagatachō
1,"Chūō, Tokyo",Nihonbashi
2,"Minato, Tokyo",Odaiba
3,Shinjuku,Shinjuku
4,Bunkyō,Hongō
5,Taitō,Ueno
6,"Sumida, Tokyo",Kinshichō
7,"Koto, Tokyo",Kiba
8,Shinagawa,Shinagawa
9,Meguro,Meguro


In [7]:
tokyo_df['Ward Name'] = tokyo_df['Ward Name'].replace({'Chiyoda, Tokyo':'Chiyoda', 'Chūō, Tokyo':'Chuo', 'Minato, Tokyo':'Minato', 
                                               'Sumida, Tokyo':'Sumida', 'Koto, Tokyo':'Koto', 'Ōta, Tokyo':'Ota', 'Nakano, Tokyo':'Nakano', 
                                                'Kita, Tokyo':'Kita', 'Arakawa, Tokyo':'Arakawa', 'Adachi, Tokyo':'Adachi', 'Edogawa, Tokyo':'Edogawa'})

tokyo_df['Major District'] = tokyo_df['Major District'].replace({'Nagatachō':'Nagatacho', 'Hongō':'Hongo', 'Kinshichō':'Kinshicho', 'Ōmori': 'Omori', 
                                         'Kōenji':'Koenji', 'Arakawa, Machiya, ':'Arakawa', 'Ayase, ':'Ayase', 'Kasai, Koiwa\n':'Kasai'})

<h3>Pt 2: Adding Latitude and Longitude to the dataframe</h3>

In [8]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="tokyo_explorer")
tokyo_df['Major_Dist_Coord']= tokyo_df['Major District'].apply(geolocator.geocode).apply(lambda x: (x.latitude, x.longitude))

tokyo_df

Unnamed: 0,Ward Name,Major District,Major_Dist_Coord
0,Chiyoda,Nagatacho,"(35.675618, 139.7434685)"
1,Chuo,Nihonbashi,"(35.68406775, 139.77450291683806)"
2,Minato,Odaiba,"(35.619050200000004, 139.77936404588297)"
3,Shinjuku,Shinjuku,"(35.6937632, 139.7036319)"
4,Bunkyō,Hongo,"(35.1753761, 137.0134764)"
5,Taitō,Ueno,"(35.711795, 139.7760755)"
6,Sumida,Kinshicho,"(35.6963122, 139.8150431)"
7,Koto,Kiba,"(23.0131338, -80.8328748)"
8,Shinagawa,Shinagawa,"(35.599252, 139.73891)"
9,Meguro,Meguro,"(35.62125, 139.688014)"


<h4>Merge the two dataframes</h4>

In [9]:
tokyo_df[['Latitude', 'Longitude']] = tokyo_df['Major_Dist_Coord'].apply(pd.Series)
tokyo_df= tokyo_df.drop(columns = ['Major_Dist_Coord'])
tokyo_df

Unnamed: 0,Ward Name,Major District,Latitude,Longitude
0,Chiyoda,Nagatacho,35.675618,139.743469
1,Chuo,Nihonbashi,35.684068,139.774503
2,Minato,Odaiba,35.61905,139.779364
3,Shinjuku,Shinjuku,35.693763,139.703632
4,Bunkyō,Hongo,35.175376,137.013476
5,Taitō,Ueno,35.711795,139.776075
6,Sumida,Kinshicho,35.696312,139.815043
7,Koto,Kiba,23.013134,-80.832875
8,Shinagawa,Shinagawa,35.599252,139.73891
9,Meguro,Meguro,35.62125,139.688014


In [10]:
from IPython.display import HTML
import pandas as pd
import base64, io

# Download as CSV: data frame, optional title and filename
def create_download_link_csv(df, title = "Download CSV file", filename = "data.csv"):  
    # generate in-memory CSV, then base64-encode it
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link_csv(tokyo_df,"Download my data","Tokyo_df.csv")

In [11]:
print('The dataframe has {} Wards'.format(len(tokyo_df['Ward Name'].unique())))

The dataframe has 23 Wards


In [12]:
tokyo_df.shape 

(23, 4)

In [13]:
## Similarly select those five wards from the Tokyo Wards Data-Frame
target_wards = ['Chiyoda', 'Chuo', 'Shinjuku', 'Shibuya', 'Minato']
tokyo_df_selected = tokyo_df.loc[tokyo_df['Ward Name'].isin(target_wards)]
tokyo_df_selected

Unnamed: 0,Ward Name,Major District,Latitude,Longitude
0,Chiyoda,Nagatacho,35.675618,139.743469
1,Chuo,Nihonbashi,35.684068,139.774503
2,Minato,Odaiba,35.61905,139.779364
3,Shinjuku,Shinjuku,35.693763,139.703632
12,Shibuya,Shibuya,35.664596,139.698711


<h4> Use geopy library to get the latitude and longitude values of Tokyo </h4>

In [14]:
!pip install folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 3.7 MB/s eta 0:00:011
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [15]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

address = 'Tokyo'

location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Tokyo are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Tokyo are 35.6828387, 139.7594549.


<h4>Create a map of Tokyo with Wards and major districts superimposed on top</h4>

In [16]:
# create map of Tokyo using latitude and longitude values
map_tokyo = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(tokyo_df_selected['Latitude'], tokyo_df_selected['Longitude'], tokyo_df_selected['Ward Name'], tokyo_df_selected['Major District']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tokyo)  
    
map_tokyo

<h4>Define Foursquare Credentials and Version</h4>

In [17]:
CLIENT_ID = '3DXRP1JTFIH2RXMCHUXMW2I3SWBJBGIKVNOSCVXOAUGAKRCD' # your Foursquare ID
CLIENT_SECRET = 'AFZ1EFQITHBFTMU4EYAZ1C35CXGM01OH4TKYN33GB3YI0MVG' # your Foursquare Secret
VERSION = '20200603' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

<h4>Let's explore the first major district in our dataframe.</h4>

In [18]:
tokyo_df_selected.loc[0, 'Major District']

'Nagatacho'

In [19]:
district_latitude = tokyo_df_selected.loc[0, 'Latitude'] # major district latitude value
district_longitude = tokyo_df_selected.loc[0, 'Longitude'] # major district longitude value

district_name = tokyo_df_selected.loc[0, 'Major District'] # major district name

print('Latitude and longitude values of {} are {}, {}.'.format(district_name, 
                                                               district_latitude, 
                                                               district_longitude))

Latitude and longitude values of Nagatacho are 35.675618, 139.7434685.


<h4> Now, let's get the top 100 venues that are in Nagatacho within a radius of 5000 meters </h4>

In [20]:
LIMIT = 100 
radius = 5000

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    district_latitude, 
    district_longitude, 
    radius, 
    LIMIT)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '6064aa742138947f20d68d54'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Tokyo',
  'headerFullLocation': 'Tokyo',
  'headerLocationGranularity': 'city',
  'totalResults': 240,
  'suggestedBounds': {'ne': {'lat': 35.720618045000045,
    'lng': 139.7987612092523},
   'sw': {'lat': 35.630617954999956, 'lng': 139.68817579074772}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b923e07f964a52010ee33e3',
       'name': 'Sushi Isshin (鮨 一新)',
       'location': {'address': '赤坂2-5-6',
        'crossStreet': '山王健保会館 1F',
        'lat': 35.67258918945737,
        'lng': 139.73939914630728,
        'labeledLatLngs': [{'label': 'display',
          'la

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

<h4>Clean the json and structure it into a pandas dataframe</h4>

In [22]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Sushi Isshin (鮨 一新),Sushi Restaurant,35.672589,139.739399
1,Lawry's The Prime Rib,American Restaurant,35.669937,139.742637
2,La Maison Kioi,French Restaurant,35.679735,139.737887
3,Yakiniku Tendan (焼肉の名門 天壇),BBQ Joint,35.674428,139.736139
4,The Prince Gallery Tokyo Kioicho (ザ・プリンスギャラリー東...,Hotel,35.679757,139.736928


In [23]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


<h4> Function to repeat the same process to all the major districts in all Wards </h4>

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Major District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
tokyo_venues = getNearbyVenues(names=tokyo_df_selected['Major District'],
                                   latitudes=tokyo_df_selected['Latitude'],
                                   longitudes=tokyo_df_selected['Longitude']
                                  )

Nagatacho
Nihonbashi
Odaiba
Shinjuku
Shibuya


In [26]:
print(tokyo_venues.shape)
tokyo_venues.head()

(372, 7)


Unnamed: 0,Major District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Nagatacho,35.675618,139.743469,Nagatacho Kurosawa (永田町 黒澤),35.674699,139.741737,Japanese Restaurant
1,Nagatacho,35.675618,139.743469,The Capitol Hotel Tokyu (ザ・キャピトルホテル東急),35.673927,139.741019,Hotel
2,Nagatacho,35.675618,139.743469,Tully's Coffee,35.674594,139.743007,Coffee Shop
3,Nagatacho,35.675618,139.743469,Shinamen Hashigo (支那麺 はしご),35.672184,139.741576,Ramen Restaurant
4,Nagatacho,35.675618,139.743469,All Day Dining Origami (オールデイダイニング ORIGAMI),35.673815,139.741104,Restaurant


<h4>No. of venues were returned for each major district</h4>

In [27]:
tokyo_venues.groupby('Major District').count()

Unnamed: 0_level_0,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Major District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Nagatacho,32,32,32,32,32,32
Nihonbashi,100,100,100,100,100,100
Odaiba,40,40,40,40,40,40
Shibuya,100,100,100,100,100,100
Shinjuku,100,100,100,100,100,100


<h4>Analyze each major district</h4>

In [28]:
# one hot encoding
tokyo_onehot = pd.get_dummies(tokyo_venues[['Venue Category']], prefix="", prefix_sep="")

# add major district column back to dataframe
tokyo_onehot['Major District'] = tokyo_venues['Major District'] 

# move major district column to the first column
fixed_columns = [tokyo_onehot.columns[-1]] + list(tokyo_onehot.columns[:-1])
tokyo_onehot = tokyo_onehot[fixed_columns]

tokyo_onehot.head()

Unnamed: 0,Major District,Accessories Store,African Restaurant,Art Museum,Auditorium,BBQ Joint,Bakery,Bar,Bath House,Bed & Breakfast,...,Tonkatsu Restaurant,Toy / Game Store,Trail,Tunnel,Udon Restaurant,Unagi Restaurant,Wagashi Place,Wine Bar,Yakitori Restaurant,Yoshoku Restaurant
0,Nagatacho,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Nagatacho,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Nagatacho,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Nagatacho,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Nagatacho,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
tokyo_onehot.shape

(372, 124)

In [30]:
#Group rows by major district and by taking the mean of the frequency of occurrence of each category

tokyo_grouped = tokyo_onehot.groupby('Major District').mean().reset_index()
tokyo_grouped

Unnamed: 0,Major District,Accessories Store,African Restaurant,Art Museum,Auditorium,BBQ Joint,Bakery,Bar,Bath House,Bed & Breakfast,...,Tonkatsu Restaurant,Toy / Game Store,Trail,Tunnel,Udon Restaurant,Unagi Restaurant,Wagashi Place,Wine Bar,Yakitori Restaurant,Yoshoku Restaurant
0,Nagatacho,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.03125,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Nihonbashi,0.0,0.0,0.01,0.0,0.05,0.04,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.01,0.02,0.03,0.0,0.0,0.04
2,Odaiba,0.0,0.0,0.0,0.025,0.0,0.0,0.05,0.025,0.0,...,0.0,0.0,0.0,0.025,0.0,0.0,0.0,0.0,0.025,0.0
3,Shibuya,0.01,0.01,0.0,0.0,0.03,0.01,0.01,0.0,0.01,...,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0
4,Shinjuku,0.0,0.0,0.0,0.0,0.05,0.0,0.08,0.0,0.0,...,0.02,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.02


In [31]:
#Print each major district along with the top 5 most common venues

num_top_venues = 5

for hood in tokyo_grouped['Major District']:
    print("----"+hood+"----")
    temp = tokyo_grouped[tokyo_grouped['Major District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Nagatacho----
                 venue  freq
0    Convenience Store  0.16
1          Coffee Shop  0.12
2  Japanese Restaurant  0.09
3                Hotel  0.06
4         Intersection  0.06


----Nihonbashi----
                 venue  freq
0                 Café  0.06
1  Japanese Restaurant  0.05
2            BBQ Joint  0.05
3                Hotel  0.04
4     Department Store  0.04


----Odaiba----
                 venue  freq
0  Japanese Restaurant  0.08
1    Convenience Store  0.08
2             Bus Stop  0.05
3          Coffee Shop  0.05
4                  Bar  0.05


----Shibuya----
              venue  freq
0              Café  0.11
1       Record Shop  0.07
2  Ramen Restaurant  0.04
3         Rock Club  0.04
4         Nightclub  0.03


----Shinjuku----
                 venue  freq
0                  Bar  0.08
1             Sake Bar  0.08
2     Ramen Restaurant  0.07
3            BBQ Joint  0.05
4  Japanese Restaurant  0.04




In [32]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [33]:
#Put this into a pandas dataframe

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Major District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Major District'] = tokyo_grouped['Major District']

for ind in np.arange(tokyo_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(tokyo_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Major District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Nagatacho,Convenience Store,Coffee Shop,Japanese Restaurant,Intersection,Hotel,Ramen Restaurant,Soba Restaurant,Café,Steakhouse,Supermarket
1,Nihonbashi,Café,BBQ Joint,Japanese Restaurant,Yoshoku Restaurant,Department Store,Coffee Shop,Chinese Restaurant,Bakery,Hotel,Sake Bar
2,Odaiba,Japanese Restaurant,Convenience Store,Coffee Shop,Hot Spring,Bus Stop,Bar,Creperie,Smoke Shop,Science Museum,Scenic Lookout
3,Shibuya,Café,Record Shop,Ramen Restaurant,Rock Club,Italian Restaurant,Nightclub,Japanese Restaurant,Clothing Store,BBQ Joint,Indie Movie Theater
4,Shinjuku,Bar,Sake Bar,Ramen Restaurant,BBQ Joint,Japanese Restaurant,Dessert Shop,Pub,Chinese Restaurant,Rock Club,Department Store


<h3>Cluster major districts</h3>

In [34]:
# set number of clusters
kclusters = 3

tokyo_grouped_clustering = tokyo_grouped.drop('Major District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tokyo_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 2, 0, 0], dtype=int32)

In [35]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tokyo_merged = tokyo_df_selected

# merge tokyo_grouped with tokyo_data to add latitude/longitude for each major district
tokyo_merged = tokyo_merged.join(neighborhoods_venues_sorted.set_index('Major District'), on='Major District')

tokyo_merged.head() # check the last columns!

Unnamed: 0,Ward Name,Major District,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Chiyoda,Nagatacho,35.675618,139.743469,1,Convenience Store,Coffee Shop,Japanese Restaurant,Intersection,Hotel,Ramen Restaurant,Soba Restaurant,Café,Steakhouse,Supermarket
1,Chuo,Nihonbashi,35.684068,139.774503,0,Café,BBQ Joint,Japanese Restaurant,Yoshoku Restaurant,Department Store,Coffee Shop,Chinese Restaurant,Bakery,Hotel,Sake Bar
2,Minato,Odaiba,35.61905,139.779364,2,Japanese Restaurant,Convenience Store,Coffee Shop,Hot Spring,Bus Stop,Bar,Creperie,Smoke Shop,Science Museum,Scenic Lookout
3,Shinjuku,Shinjuku,35.693763,139.703632,0,Bar,Sake Bar,Ramen Restaurant,BBQ Joint,Japanese Restaurant,Dessert Shop,Pub,Chinese Restaurant,Rock Club,Department Store
12,Shibuya,Shibuya,35.664596,139.698711,0,Café,Record Shop,Ramen Restaurant,Rock Club,Italian Restaurant,Nightclub,Japanese Restaurant,Clothing Store,BBQ Joint,Indie Movie Theater


In [36]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tokyo_merged['Latitude'], tokyo_merged['Longitude'], tokyo_merged['Major District'], tokyo_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h4> Examine Clusters</h4>

In [37]:
#Cluster 1

tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 0, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Major District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Nihonbashi,Café,BBQ Joint,Japanese Restaurant,Yoshoku Restaurant,Department Store,Coffee Shop,Chinese Restaurant,Bakery,Hotel,Sake Bar
3,Shinjuku,Bar,Sake Bar,Ramen Restaurant,BBQ Joint,Japanese Restaurant,Dessert Shop,Pub,Chinese Restaurant,Rock Club,Department Store
12,Shibuya,Café,Record Shop,Ramen Restaurant,Rock Club,Italian Restaurant,Nightclub,Japanese Restaurant,Clothing Store,BBQ Joint,Indie Movie Theater


In [38]:
#Cluster 2

tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 1, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Major District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Nagatacho,Convenience Store,Coffee Shop,Japanese Restaurant,Intersection,Hotel,Ramen Restaurant,Soba Restaurant,Café,Steakhouse,Supermarket


In [39]:
#Cluster 3

tokyo_merged.loc[tokyo_merged['Cluster Labels'] == 2, tokyo_merged.columns[[1] + list(range(5, tokyo_merged.shape[1]))]]

Unnamed: 0,Major District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Odaiba,Japanese Restaurant,Convenience Store,Coffee Shop,Hot Spring,Bus Stop,Bar,Creperie,Smoke Shop,Science Museum,Scenic Lookout
