### Obtain table from webpage and transform the data into a pandas dataframe

In [65]:
import pandas as pd
df = pd.read_html('http://listkodepos.com/daftar-lengkap-kode-pos-kota-bandung/')[0]
df

Unnamed: 0,0,1,2,3,4,5,6
0,1,40611,Cigending,Ujung Berung,Kota,Bandung,Jawa Barat
1,2,40617,Pasanggrahan,Ujung Berung,Kota,Bandung,Jawa Barat
2,3,40619,Pasir Endah,Ujung Berung,Kota,Bandung,Jawa Barat
3,4,40616,Pasirjati,Ujung Berung,Kota,Bandung,Jawa Barat
4,5,40618,Pasirwangi,Ujung Berung,Kota,Bandung,Jawa Barat
5,6,40611,Ujung Berung,Ujung Berung,Kota,Bandung,Jawa Barat
6,7,40117,Babakan Ciamis,Sumur Bandung,Kota,Bandung,Jawa Barat
7,8,40111,Braga,Sumur Bandung,Kota,Bandung,Jawa Barat
8,9,40112,Kebon Pisang,Sumur Bandung,Kota,Bandung,Jawa Barat
9,10,40113,Merdeka,Sumur Bandung,Kota,Bandung,Jawa Barat


### Rename columns into English words

In [66]:
df.rename(columns={1: 'PostalCode', 2: 'Village', 3:'Subdistrict'}, inplace=True)
df

Unnamed: 0,0,PostalCode,Village,Subdistrict,4,5,6
0,1,40611,Cigending,Ujung Berung,Kota,Bandung,Jawa Barat
1,2,40617,Pasanggrahan,Ujung Berung,Kota,Bandung,Jawa Barat
2,3,40619,Pasir Endah,Ujung Berung,Kota,Bandung,Jawa Barat
3,4,40616,Pasirjati,Ujung Berung,Kota,Bandung,Jawa Barat
4,5,40618,Pasirwangi,Ujung Berung,Kota,Bandung,Jawa Barat
5,6,40611,Ujung Berung,Ujung Berung,Kota,Bandung,Jawa Barat
6,7,40117,Babakan Ciamis,Sumur Bandung,Kota,Bandung,Jawa Barat
7,8,40111,Braga,Sumur Bandung,Kota,Bandung,Jawa Barat
8,9,40112,Kebon Pisang,Sumur Bandung,Kota,Bandung,Jawa Barat
9,10,40113,Merdeka,Sumur Bandung,Kota,Bandung,Jawa Barat


### Drop unecessary rows

In [67]:
df.drop([0,4,5,6], axis=1, inplace=True)
df

Unnamed: 0,PostalCode,Village,Subdistrict
0,40611,Cigending,Ujung Berung
1,40617,Pasanggrahan,Ujung Berung
2,40619,Pasir Endah,Ujung Berung
3,40616,Pasirjati,Ujung Berung
4,40618,Pasirwangi,Ujung Berung
5,40611,Ujung Berung,Ujung Berung
6,40117,Babakan Ciamis,Sumur Bandung
7,40111,Braga,Sumur Bandung
8,40112,Kebon Pisang,Sumur Bandung
9,40113,Merdeka,Sumur Bandung


### Group dataframe based on PostalCode and Subdistrict and combine Villages for each group

In [55]:
df = df.groupby(["PostalCode", "Subdistrict"])
df = df["Village"].agg(lambda column: ", ".join(column))
df = df.reset_index(name="Village")
df

Unnamed: 0,PostalCode,Subdistrict,Village
0,40111,Sumur Bandung,Braga
1,40112,Sumur Bandung,Kebon Pisang
2,40113,Sumur Bandung,Merdeka
3,40114,Bandung Wetan,Cihapit
4,40115,Bandung Wetan,Citarum
5,40116,Bandung Wetan,Tamansari
6,40117,Sumur Bandung,Babakan Ciamis
7,40121,Cibeunying Kidul,"Cicadas, Sukamaju"
8,40122,Cibeunying Kaler,Cihaur Geulis
9,40123,Cibeunying Kaler,Sukaluyu


### Check the number of rows and column of the dataframe

In [56]:
df.shape

(105, 3)

In [57]:
df['Subdistrict'].unique()

array(['Sumur Bandung', 'Bandung Wetan', 'Cibeunying Kidul',
       'Cibeunying Kaler', 'Coblong', 'Cidadap', 'Sukasari', 'Sukajadi',
       'Cicendo', 'Andir', 'Mandalajati', 'Bandung Kulon',
       'Babakan Ciparay', 'Bojongloa Kaler', 'Bojongloa Kidul',
       'Astana Anyar', 'Regol', 'Bandung Kidul', 'Lengkong',
       'Batununggal', 'Kiaracondong', 'Buahbatu (Margacinta)',
       'Antapani (Cicadas)', 'Rancasari', 'Arcamanik', 'Cinambo',
       'Gedebage', 'Ujung Berung', 'Cibiru', 'Panyileukan'], dtype=object)

In [68]:
df.sort_values(by=['Subdistrict'], inplace=True)

In [69]:
df

Unnamed: 0,PostalCode,Village,Subdistrict
152,40184,Maleber (Maleer),Andir
147,40184,Campaka,Andir
148,40182,Ciroyom,Andir
151,40181,Kebon Jeruk,Andir
150,40184,Garuda,Andir
149,40183,Dungus Cariang,Andir
146,40291,Antapani Wetan,Antapani (Cicadas)
145,40291,Antapani Tengah,Antapani (Cicadas)
144,40291,Antapani Kulon,Antapani (Cicadas)
143,40291,Antapani Kidul,Antapani (Cicadas)


In [70]:
cols = df.columns.tolist()
cols

['PostalCode', 'Village', 'Subdistrict']

In [71]:
cols.insert(0, cols.pop(cols.index('Subdistrict')))
cols

['Subdistrict', 'PostalCode', 'Village']

In [72]:
df = df.reindex(columns= cols)
df

Unnamed: 0,Subdistrict,PostalCode,Village
152,Andir,40184,Maleber (Maleer)
147,Andir,40184,Campaka
148,Andir,40182,Ciroyom
151,Andir,40181,Kebon Jeruk
150,Andir,40184,Garuda
149,Andir,40183,Dungus Cariang
146,Antapani (Cicadas),40291,Antapani Wetan
145,Antapani (Cicadas),40291,Antapani Tengah
144,Antapani (Cicadas),40291,Antapani Kulon
143,Antapani (Cicadas),40291,Antapani Kidul


###  Load the csv file that has the geographical coordinates of each postal code

In [137]:
coord_list=pd.read_csv("http://data.bandung.go.id/dataset/0ac32316-8450-4064-b7f2-48049439ff5e/resource/6a568b69-5e41-4ea0-80ca-0487341fe9f3/download/koordinat-dan-ketinggian-kantor-kelurahan-di-kota-bandung-2014.csv")
coord_list

Unnamed: 0,Kecamatan,Kelurahan,Lintang Selatan,Bujur Timur,Ketinggian (dpl),Unnamed: 5,Unnamed: 6
0,Bandung Kulon,GEMPOL SARI,-6.92911,107.55907,696,,
1,Bandung Kulon,CIGONDEWAH KALER,-6.93411,107.56361,700,,
2,Bandung Kulon,CIGONDEWAH KIDUL,-6.94386,107.56005,686,,
3,Bandung Kulon,CIGONDEWAH RAHAYU,-6.94889,107.56314,683,,
4,Bandung Kulon,CARINGIN,-6.92727,107.57698,702,,
5,Bandung Kulon,WARUNG MUNCANG,-6.92495,107.57698,400,,
6,Bandung Kulon,CIBUNTU,-6.91819,107.57328,716,,
7,Bandung Kulon,CIJERAH,-6.92018,107.56948,713,,
8,Babakan Ciparay,MARGASUKA,-6.9527,107.5673,681,,
9,Babakan Ciparay,CIRANGRANG,-6.9593,107.585,674,,


### Translate column labels into English

In [138]:
coord_list.rename(columns={'Kecamatan': 'Subdistrict', 'Kelurahan': 'Village','Lintang Selatan':'Latitude','Bujur Timur':'Longitude'}, inplace=True)
coord_list

Unnamed: 0,Subdistrict,Village,Latitude,Longitude,Ketinggian (dpl),Unnamed: 5,Unnamed: 6
0,Bandung Kulon,GEMPOL SARI,-6.92911,107.55907,696,,
1,Bandung Kulon,CIGONDEWAH KALER,-6.93411,107.56361,700,,
2,Bandung Kulon,CIGONDEWAH KIDUL,-6.94386,107.56005,686,,
3,Bandung Kulon,CIGONDEWAH RAHAYU,-6.94889,107.56314,683,,
4,Bandung Kulon,CARINGIN,-6.92727,107.57698,702,,
5,Bandung Kulon,WARUNG MUNCANG,-6.92495,107.57698,400,,
6,Bandung Kulon,CIBUNTU,-6.91819,107.57328,716,,
7,Bandung Kulon,CIJERAH,-6.92018,107.56948,713,,
8,Babakan Ciparay,MARGASUKA,-6.9527,107.5673,681,,
9,Babakan Ciparay,CIRANGRANG,-6.9593,107.585,674,,


In [139]:
coord_list.drop(['Ketinggian (dpl)', 'Unnamed: 5', 'Unnamed: 6'], axis=1, inplace=True)
coord_list

Unnamed: 0,Subdistrict,Village,Latitude,Longitude
0,Bandung Kulon,GEMPOL SARI,-6.92911,107.55907
1,Bandung Kulon,CIGONDEWAH KALER,-6.93411,107.56361
2,Bandung Kulon,CIGONDEWAH KIDUL,-6.94386,107.56005
3,Bandung Kulon,CIGONDEWAH RAHAYU,-6.94889,107.56314
4,Bandung Kulon,CARINGIN,-6.92727,107.57698
5,Bandung Kulon,WARUNG MUNCANG,-6.92495,107.57698
6,Bandung Kulon,CIBUNTU,-6.91819,107.57328
7,Bandung Kulon,CIJERAH,-6.92018,107.56948
8,Babakan Ciparay,MARGASUKA,-6.9527,107.5673
9,Babakan Ciparay,CIRANGRANG,-6.9593,107.585


In [140]:
coord_list = coord_list.groupby(['Subdistrict','Latitude','Longitude'])
coord_list = coord_list['Village'].agg(lambda column: ", ".join(column))
coord_list = coord_list.reset_index(name='Village')
coord_list

Unnamed: 0,Subdistrict,Latitude,Longitude,Village
0,Andir,-6.91908,107.60107,KEBON JERUK
1,Andir,-6.91596,107.57656,GARUDA
2,Andir,-6.91295,107.58617,"DUNGUS CARIANG, CIROYOM"
3,Andir,-6.90734,107.57344,MALEBER
4,Andir,-6.89787,107.56314,CAMPAKA
5,Antapani,-6.91753,107.66056,ANTAPANI KIDUL
6,Antapani,-6.91474,107.66191,ANTAPANI TENGAH
7,Antapani,-6.9135,107.6658,ANTAPANI WETAN
8,Antapani,-6.9101,107.6577,ANTAPANI KULON
9,Arcamanik,-6.9332,107.6729,CISARANTEN ENDAH


In [141]:
coord_list.shape

(150, 4)

### Concatenate columns in df and coord_list dataframes

In [78]:
df = pd.concat([df, coord_list], axis=1)
df

Unnamed: 0,Subdistrict,PostalCode,Village,Subdistrict.1,Latitude,Longitude,Village.1
0,Ujung Berung,40611,Cigending,Andir,-6.91908,107.60107,KEBON JERUK
1,Ujung Berung,40617,Pasanggrahan,Andir,-6.91596,107.57656,GARUDA
2,Ujung Berung,40619,Pasir Endah,Andir,-6.91295,107.58617,"DUNGUS CARIANG, CIROYOM"
3,Ujung Berung,40616,Pasirjati,Andir,-6.90734,107.57344,MALEBER
4,Ujung Berung,40618,Pasirwangi,Andir,-6.89787,107.56314,CAMPAKA
5,Ujung Berung,40611,Ujung Berung,Antapani,-6.91753,107.66056,ANTAPANI KIDUL
6,Sumur Bandung,40117,Babakan Ciamis,Antapani,-6.91474,107.66191,ANTAPANI TENGAH
7,Sumur Bandung,40111,Braga,Antapani,-6.9135,107.6658,ANTAPANI WETAN
8,Sumur Bandung,40112,Kebon Pisang,Antapani,-6.9101,107.6577,ANTAPANI KULON
9,Sumur Bandung,40113,Merdeka,Arcamanik,-6.9332,107.6729,CISARANTEN ENDAH


## Download all dependencies we need

In [18]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Libraries imported.


## Explore and cluster the neighborhoods

### Slice the original dataframe and create new dataframe consists of only boroughs that contain the word 'Bandung'

In [142]:
bdg_sub = coord_list[coord_list['Subdistrict'].str.contains('Bandung')].reset_index(drop=True)
bdg_sub.head()

Unnamed: 0,Subdistrict,Latitude,Longitude,Village
0,Bandung Kidul,-6.9627,107.6474,KUJANGSARI
1,Bandung Kidul,-6.9625,107.6354,MENGGER
2,Bandung Kidul,-6.9592,107.6133,WATES
3,Bandung Kidul,-6.953,107.638,BATUNUNGGAL
4,Bandung Kulon,-6.94889,107.56314,CIGONDEWAH RAHAYU


In [143]:
bdg_sub.shape

(19, 4)

### Get the geographical coordinates of Bandung

In [144]:
address = 'Bandung, ID'

geolocator = Nominatim(user_agent="bdg_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bandung are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bandung are -6.9344694, 107.6049539.


### Visualize the neighborhoods of Bandung

#### The folium maps do not seem to render on GitHub natively. Youshould be able to view the map by inputting this .ipynb file link (https://github.com/rerirarara/Coursera_Capstone/blob/master/Segmenting%20and%20Clustering%20Neighborhoods3.ipynb) on github into https://nbviewer.jupyter.org/.

In [145]:
# create map of Toronto using latitude and longitude values
map_bdg = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(bdg_sub['Latitude'], bdg_sub['Longitude'], bdg_sub['Village']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bdg)  
    
map_bdg

### Define Foursquare credentials and version

In [91]:
CLIENT_ID = 'NKMINA5PCB2QT52SCUH1OYLDAEKEPGIVURSKW3YK1YA1LOSO' # your Foursquare ID
CLIENT_SECRET = 'S2LCFJPL53GGZY2RD5SSXOOICOP4UECPGGCVJ0YONKJUFR50' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: NKMINA5PCB2QT52SCUH1OYLDAEKEPGIVURSKW3YK1YA1LOSO
CLIENT_SECRET:S2LCFJPL53GGZY2RD5SSXOOICOP4UECPGGCVJ0YONKJUFR50


### Explore the first neighborhood in the dataframe

#### Get the neighborhood's name

In [146]:
bdg_sub.loc[0, 'Village']

'KUJANGSARI'

#### Get the neighborhood's latitude and longitude values

In [147]:
neighborhood_latitude = bdg_sub.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = bdg_sub.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = bdg_sub.loc[0, 'Village'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of KUJANGSARI are -6.9627, 107.6474.


### Get the top 100 venues that are in Kujangsari within a radius of 500 meters

#### Create the GET request URL

In [148]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=NKMINA5PCB2QT52SCUH1OYLDAEKEPGIVURSKW3YK1YA1LOSO&client_secret=S2LCFJPL53GGZY2RD5SSXOOICOP4UECPGGCVJ0YONKJUFR50&v=20180605&ll=-6.9627,107.6474&radius=500&limit=100'

#### Send the GET request and examine the results

In [149]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e549dcf02a172001b8dd5b4'},
  'headerLocation': 'Bandung',
  'headerFullLocation': 'Bandung',
  'headerLocationGranularity': 'city',
  'totalResults': 1,
  'suggestedBounds': {'ne': {'lat': -6.958199995499996,
    'lng': 107.65192497288835},
   'sw': {'lat': -6.967200004500004, 'lng': 107.64287502711166}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bdc2a21c79cc928216685e9',
       'name': 'MARLBORO cafe',
       'location': {'address': 'Kencana arum',
        'lat': -6.9641582483742965,
        'lng': 107.6516453028801,
        'labeledLatLngs': [{'label': 'display',
          'lat': -6.9641582483742965,
          'lng': 107.6516453028801}],
        'distance': 496,
        'cc': 'ID',
        'city': 'Bandung',
        'state':

#### Extract the category of the venue

In [150]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#### Clean the json and structure it into pandas dataframe

In [151]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,MARLBORO cafe,Café,-6.964158,107.651645


#### Print out the number of venues returned by Foursquare

In [152]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

1 venues were returned by Foursquare.


### Explore neighborhoods in Bandung

In [153]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Create a new dataframe of nearby venues on each neighborhood

In [154]:
bdg_venues = getNearbyVenues(names=bdg_sub['Village'],
                                   latitudes=bdg_sub['Latitude'],
                                   longitudes=bdg_sub['Longitude']
                                  )

KUJANGSARI
MENGGER
WATES
BATUNUNGGAL
CIGONDEWAH RAHAYU
CIGONDEWAH KIDUL
CIGONDEWAH KALER
GEMPOL SARI
CARINGIN
WARUNG MUNCANG
CIJERAH
CIBUNTU
CIHAPIT
TAMAN SARI
CITARUM
KEBON PISANG
BRAGA
BABAKAN CIAMIS
MERDEKA


#### Check the size of the new dataframe

In [155]:
print(bdg_venues.shape)
bdg_venues.head()

(387, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,KUJANGSARI,-6.9627,107.6474,MARLBORO cafe,-6.964158,107.651645,Café
1,MENGGER,-6.9625,107.6354,Alfamart,-6.965149,107.63792,Convenience Store
2,MENGGER,-6.9625,107.6354,Farél Pâtisserie Café,-6.962046,107.638743,Bakery
3,MENGGER,-6.9625,107.6354,Pizza Hut,-6.961285,107.633298,Pizza Place
4,MENGGER,-6.9625,107.6354,Angkringan Mas Jo,-6.964924,107.637304,Coffee Shop


#### Check the amount of venues were returned for each neighborhood

In [156]:
bdg_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BABAKAN CIAMIS,16,16,16,16,16,16
BATUNUNGGAL,7,7,7,7,7,7
BRAGA,43,43,43,43,43,43
CARINGIN,6,6,6,6,6,6
CIBUNTU,6,6,6,6,6,6
CIGONDEWAH KALER,4,4,4,4,4,4
CIGONDEWAH KIDUL,1,1,1,1,1,1
CIGONDEWAH RAHAYU,4,4,4,4,4,4
CIHAPIT,100,100,100,100,100,100
CIJERAH,4,4,4,4,4,4


#### Print out the number of unique categories curated from all returned venues

In [157]:
print('There are {} uniques categories.'.format(len(bdg_venues['Venue Category'].unique())))

There are 112 uniques categories.


### Analyze each neighborhood

In [158]:
# one hot encoding
bdg_onehot = pd.get_dummies(bdg_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bdg_onehot['Neighborhood'] = bdg_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [bdg_onehot.columns[-1]] + list(bdg_onehot.columns[:-1])
bdg_onehot = bdg_onehot[fixed_columns]

bdg_onehot.head()

Unnamed: 0,Neighborhood,Acehnese Restaurant,African Restaurant,American Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Beach Bar,Bed & Breakfast,Beer Garden,Bistro,Board Shop,Bowling Alley,Breakfast Spot,Bubble Tea Shop,Buffet,Café,Camera Store,Chinese Restaurant,Chocolate Shop,City Hall,Clothing Store,Coffee Shop,Comfort Food Restaurant,Community Center,Convenience Store,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Eastern European Restaurant,Electronics Store,Fast Food Restaurant,Field,Fish & Chips Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Golf Course,Gourmet Shop,Government Building,Gym / Fitness Center,Health & Beauty Service,Hobby Shop,Hostel,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Meatball Place,Indonesian Restaurant,Japanese Restaurant,Javanese Restaurant,Jewelry Store,Karaoke Bar,Kids Store,Korean Restaurant,Lounge,Market,Martial Arts Dojo,Massage Studio,Medical Center,Medical Lab,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Music School,Music Store,Music Venue,New American Restaurant,Noodle House,Other Event,Outdoor Supply Store,Outlet Mall,Outlet Store,Padangnese Restaurant,Park,Pastry Shop,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Pool,Pool Hall,Restaurant,Salad Place,Satay Restaurant,Seafood Restaurant,Shipping Store,Shopping Mall,Snack Place,Soccer Field,Soup Place,Spa,Stadium,Steakhouse,Sundanese Restaurant,Sushi Restaurant,Tennis Stadium,Thai Restaurant,Thrift / Vintage Store,Udon Restaurant,Video Store
0,KUJANGSARI,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,MENGGER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,MENGGER,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,MENGGER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,MENGGER,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Examine the new dataframe size

In [159]:
bdg_onehot.shape

(387, 113)

#### Create a new dataframe that group rows by neighborhood and by taking the mean of the frequency of occurence of each category

In [172]:
bdg_grouped = bdg_onehot.groupby('Neighborhood').mean().reset_index()
bdg_grouped

Unnamed: 0,Neighborhood,Acehnese Restaurant,African Restaurant,American Restaurant,Arcade,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Beach Bar,Bed & Breakfast,Beer Garden,Bistro,Board Shop,Bowling Alley,Breakfast Spot,Bubble Tea Shop,Buffet,Café,Camera Store,Chinese Restaurant,Chocolate Shop,City Hall,Clothing Store,Coffee Shop,Comfort Food Restaurant,Community Center,Convenience Store,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Eastern European Restaurant,Electronics Store,Fast Food Restaurant,Field,Fish & Chips Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Golf Course,Gourmet Shop,Government Building,Gym / Fitness Center,Health & Beauty Service,Hobby Shop,Hostel,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Meatball Place,Indonesian Restaurant,Japanese Restaurant,Javanese Restaurant,Jewelry Store,Karaoke Bar,Kids Store,Korean Restaurant,Lounge,Market,Martial Arts Dojo,Massage Studio,Medical Center,Medical Lab,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Music School,Music Store,Music Venue,New American Restaurant,Noodle House,Other Event,Outdoor Supply Store,Outlet Mall,Outlet Store,Padangnese Restaurant,Park,Pastry Shop,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Pool,Pool Hall,Restaurant,Salad Place,Satay Restaurant,Seafood Restaurant,Shipping Store,Shopping Mall,Snack Place,Soccer Field,Soup Place,Spa,Stadium,Steakhouse,Sundanese Restaurant,Sushi Restaurant,Tennis Stadium,Thai Restaurant,Thrift / Vintage Store,Udon Restaurant,Video Store
0,BABAKAN CIAMIS,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,BATUNUNGGAL,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,BRAGA,0.0,0.0,0.0,0.0,0.0,0.069767,0.023256,0.0,0.046512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.046512,0.0,0.023256,0.0,0.0,0.0,0.116279,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.023256,0.069767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.116279,0.0,0.0,0.0,0.023256,0.0,0.0,0.023256,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046512,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.023256,0.023256,0.0,0.0,0.046512,0.023256,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.046512,0.023256,0.0,0.0,0.0,0.0,0.0,0.0
3,CARINGIN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,CIBUNTU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,CIGONDEWAH KALER,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,CIGONDEWAH KIDUL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,CIGONDEWAH RAHAYU,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
8,CIHAPIT,0.01,0.01,0.01,0.01,0.01,0.03,0.0,0.0,0.03,0.01,0.01,0.01,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.11,0.0,0.0,0.01,0.0,0.03,0.08,0.02,0.01,0.02,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.06,0.01,0.0,0.0,0.04,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.01,0.04,0.02,0.01,0.0,0.0,0.01,0.0,0.02
9,CIJERAH,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Check the new dataframe size

In [161]:
bdg_grouped.shape

(19, 113)

#### Print each neighborhood along with the top 5 most common venues

In [173]:
num_top_venues = 5

for hood in bdg_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = bdg_grouped[bdg_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----BABAKAN CIAMIS----
                   venue  freq
0  Indonesian Restaurant  0.12
1                   Park  0.06
2      Korean Restaurant  0.06
3         Shipping Store  0.06
4  Padangnese Restaurant  0.06


----BATUNUNGGAL----
                   venue  freq
0     Chinese Restaurant  0.29
1  Padangnese Restaurant  0.14
2    Javanese Restaurant  0.14
3       Asian Restaurant  0.14
4           Noodle House  0.14


----BRAGA----
               venue  freq
0              Hotel  0.12
1        Coffee Shop  0.12
2  Electronics Store  0.07
3   Asian Restaurant  0.07
4           Pharmacy  0.05


----CARINGIN----
                       venue  freq
0  Indonesian Meatball Place  0.33
1                      Diner  0.17
2             Clothing Store  0.17
3         Chinese Restaurant  0.17
4                 Restaurant  0.17


----CIBUNTU----
                  venue  freq
0     Convenience Store  0.17
1    Seafood Restaurant  0.17
2           Other Event  0.17
3  Gym / Fitness Center  0.17
4       

#### Sort the venues in descending order

In [174]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Put the above result into a new dataframe

In [175]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = bdg_grouped['Neighborhood']

for ind in np.arange(bdg_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bdg_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,BABAKAN CIAMIS,Indonesian Restaurant,Hotel,Bakery,Breakfast Spot,Park,Bistro,Padangnese Restaurant,Shipping Store,City Hall,Korean Restaurant
1,BATUNUNGGAL,Chinese Restaurant,Noodle House,Padangnese Restaurant,Asian Restaurant,Javanese Restaurant,Café,Field,Cupcake Shop,Department Store,Dessert Shop
2,BRAGA,Hotel,Coffee Shop,Asian Restaurant,Electronics Store,Steakhouse,Miscellaneous Shop,Bakery,Café,Pharmacy,Dim Sum Restaurant
3,CARINGIN,Indonesian Meatball Place,Clothing Store,Chinese Restaurant,Diner,Restaurant,Video Store,Field,Cupcake Shop,Department Store,Dessert Shop
4,CIBUNTU,Seafood Restaurant,Pool,Convenience Store,Chinese Restaurant,Other Event,Gym / Fitness Center,Golf Course,Eastern European Restaurant,Cosmetics Shop,Cupcake Shop


### Cluster the neighborhood into 5 clusters using k-means

In [176]:
# set number of clusters
kclusters = 10

bdg_grouped_clustering = bdg_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bdg_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 3, 0, 9, 1, 6, 0, 7], dtype=int32)

#### Create a new dataframe that includes the cluster and the top 10 venues for each neighborhood

In [177]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

bdg_merged = bdg_sub

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
bdg_merged = bdg_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Village')

bdg_merged.head() # check the last columns!

Unnamed: 0,Subdistrict,Latitude,Longitude,Village,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bandung Kidul,-6.9627,107.6474,KUJANGSARI,2,Café,Video Store,Convenience Store,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Eastern European Restaurant
1,Bandung Kidul,-6.9625,107.6354,MENGGER,8,Coffee Shop,Pizza Place,Bakery,Convenience Store,Video Store,Field,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant
2,Bandung Kidul,-6.9592,107.6133,WATES,4,Middle Eastern Restaurant,Food & Drink Shop,Video Store,Field,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner
3,Bandung Kidul,-6.953,107.638,BATUNUNGGAL,0,Chinese Restaurant,Noodle House,Padangnese Restaurant,Asian Restaurant,Javanese Restaurant,Café,Field,Cupcake Shop,Department Store,Dessert Shop
4,Bandung Kulon,-6.94889,107.56314,CIGONDEWAH RAHAYU,6,Clothing Store,Tennis Stadium,Arcade,Food Truck,Video Store,Fish & Chips Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant


#### Visualize the resulting clusters

In [178]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bdg_merged['Latitude'], bdg_merged['Longitude'], bdg_merged['Village'], bdg_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine clusters

#### Cluster 1

In [179]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 0, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,BATUNUNGGAL,Chinese Restaurant,Noodle House,Padangnese Restaurant,Asian Restaurant,Javanese Restaurant,Café,Field,Cupcake Shop,Department Store,Dessert Shop
11,CIBUNTU,Seafood Restaurant,Pool,Convenience Store,Chinese Restaurant,Other Event,Gym / Fitness Center,Golf Course,Eastern European Restaurant,Cosmetics Shop,Cupcake Shop
12,CIHAPIT,Café,Coffee Shop,Hotel,Steakhouse,Indonesian Restaurant,Asian Restaurant,Bakery,Clothing Store,Spa,Pool Hall
13,TAMAN SARI,Hotel,Clothing Store,Japanese Restaurant,Café,Beer Garden,Dessert Shop,Electronics Store,Park,Gourmet Shop,Mobile Phone Shop
14,CITARUM,Coffee Shop,Bakery,Clothing Store,Steakhouse,Indonesian Restaurant,Pizza Place,Breakfast Spot,Hotel,Sundanese Restaurant,Café
15,KEBON PISANG,Bakery,Indonesian Restaurant,Coffee Shop,Noodle House,Hotel,Department Store,Performing Arts Venue,Café,Satay Restaurant,Outlet Mall
16,BRAGA,Hotel,Coffee Shop,Asian Restaurant,Electronics Store,Steakhouse,Miscellaneous Shop,Bakery,Café,Pharmacy,Dim Sum Restaurant
17,BABAKAN CIAMIS,Indonesian Restaurant,Hotel,Bakery,Breakfast Spot,Park,Bistro,Padangnese Restaurant,Shipping Store,City Hall,Korean Restaurant
18,MERDEKA,Coffee Shop,Asian Restaurant,Café,Spa,Hotel,Japanese Restaurant,Dessert Shop,Noodle House,Golf Course,Market


#### Cluster 2

In [180]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 1, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,CIGONDEWAH KIDUL,Music Venue,Video Store,Hostel,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop


#### Cluster 3

In [181]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 2, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,KUJANGSARI,Café,Video Store,Convenience Store,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Eastern European Restaurant


#### Cluster 4

In [182]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 3, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,CARINGIN,Indonesian Meatball Place,Clothing Store,Chinese Restaurant,Diner,Restaurant,Video Store,Field,Cupcake Shop,Department Store,Dessert Shop
9,WARUNG MUNCANG,Indonesian Meatball Place,Sundanese Restaurant,Diner,Restaurant,Video Store,Field,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop


#### Cluster 5

In [183]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 4, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,WATES,Middle Eastern Restaurant,Food & Drink Shop,Video Store,Field,Cosmetics Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner


In [184]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 5, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,GEMPOL SARI,Indonesian Restaurant,Café,Food Truck,Video Store,Fish & Chips Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner


In [185]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 6, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,CIGONDEWAH RAHAYU,Clothing Store,Tennis Stadium,Arcade,Food Truck,Video Store,Fish & Chips Shop,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant


In [186]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 7, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,CIJERAH,Japanese Restaurant,Chinese Restaurant,Pizza Place,Market,Video Store,Field,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant


In [187]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 8, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,MENGGER,Coffee Shop,Pizza Place,Bakery,Convenience Store,Video Store,Field,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant


In [188]:
bdg_merged.loc[bdg_merged['Cluster Labels'] == 9, bdg_merged.columns[[3] + list(range(5, bdg_merged.shape[1]))]]

Unnamed: 0,Village,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,CIGONDEWAH KALER,Food,Indonesian Restaurant,Bar,Soccer Field,Video Store,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner
