<h1>Singapore vs Hong Kong - A Tale of Two Cities</h1>

<h2>Importing basis libraries and installing packages</h2>

In [1]:
import pandas as pd
import numpy as np

In [2]:
import requests # library to handle requests
import random # library for random number generation

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    ------------------------------------------------------------
                       

<h2>Reading csv files containing district and location data for Singapore and Hong Kong</h2>

In [3]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,District,Latitude,Longitude,Population
0,"District 01 - Raffles Place, Marina, Cecil",1.28372,103.851239,
1,"District 02 - Tanjong Pagar, Chinatown",1.276571,103.845848,
2,"District 03 - Tiong Bahru, Alexandra, Queenstown",1.286197,103.825765,
3,"District 04 - Mount Faber, Telok Blangah, Harb...",1.267745,103.822485,
4,"District 05 - Buona Vista, Pasir Panjang, Clem...",1.29246,103.787773,


<h2>Visualization of the citys' districts on a map</h2>

In [4]:
hk_address = 'Hong Kong'
hk_latitude = 22.302711
hk_longitude = 114.177216
sg_address = 'Singapore'
sg_latitude = 1.3521
sg_longitude = 103.8198

In [5]:
# create map of New York using latitude and longitude values
hk_map = folium.Map(location=[hk_latitude, hk_longitude], zoom_start=10)

# add markers to map
for lat, lng, region, district in zip(hk_districts_data['Latitude'], hk_districts_data['Longitude'], hk_districts_data['Region'], hk_districts_data['District']):
    label = '{}, {}'.format(district, region)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(hk_map)
    
hk_map

In [6]:
# create map of New York using latitude and longitude values
sg_map = folium.Map(location=[sg_latitude, sg_longitude], zoom_start=10)

for lat, lng, district in zip(sg_districts_data['Latitude'], sg_districts_data['Longitude'], sg_districts_data['District']):
    label = '{}'.format(district)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(sg_map)
    
sg_map

<h2>Extracting venue information for each district in Hong Kong</h2>

In [7]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [8]:
CLIENT_ID = 'DXOLZNK5E3HB4GQLOFBTMK1KZKZVADQFLFXVXZHRQOHBXKTK' # your Foursquare ID
CLIENT_SECRET = 'UGSJTKYNZOXNZCYPIK0XP2UTADUJZQVFU43VEJDWBALFRSF4' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

# type your answer here
radius = 3000
LIMIT = 300

def getNearbyVenues(names, latitudes, longitudes, city, radius=500):
    
    nearby_venues = pd.DataFrame(columns=['Name', 'Category', 'Latitude', 'Longitude', 'City', 'District', 'District Latitude', 'District Longitude'])
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        
        results = requests.get(url).json()
        venues = results['response']['venues']
        df = json_normalize(venues) # flatten JSON

        filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
        df = df.loc[:, filtered_columns]

        # filter the category for each row
        df['categories'] = df.apply(get_category_type, axis=1)
        df['City']=city
        df['District']=name
        df['District Latitude']=lat
        df['District Longitude']=lng
        
        # clean columns
        #df.columns = [col.split(".")[-1] for col in df.columns]
        df.rename(columns={"name": "Name", "categories": "Category","location.lat": "Latitude","location.lng": "Longitude"}, inplace=True)
        nearby_venues = nearby_venues.append(df)
          
    return(nearby_venues)

Your credentails:
CLIENT_ID: DXOLZNK5E3HB4GQLOFBTMK1KZKZVADQFLFXVXZHRQOHBXKTK
CLIENT_SECRET:UGSJTKYNZOXNZCYPIK0XP2UTADUJZQVFU43VEJDWBALFRSF4


In [10]:
hk_venues = getNearbyVenues(names=hk_districts_data['District'],
                                   latitudes=hk_districts_data['Latitude'],
                                   longitudes=hk_districts_data['Longitude'],
                                   city="Hong Kong"
                                  )
hk_venues.head()

Central and Western
Eastern
Southern
Wan Chai
Sham Shui Po
Kowloon City
Kwun Tong
Wong Tai Sin
Yau Tsim Mong
Islands
Kwai Tsing
North
Sai Kung
Sha Tin
Tai Po
Tsuen Wan
Tuen Mun
Yuen Long


Unnamed: 0,Name,Category,Latitude,Longitude,City,District,District Latitude,District Longitude
0,Immigration Department Travel Documents Issuin...,Government Building,22.286864,114.155123,Hong Kong,Central and Western,22.28666,114.15497
1,Harbour Building (海港政府大樓),Government Building,22.286592,114.154997,Hong Kong,Central and Western,22.28666,114.15497
2,Marine Department 海事處,Government Building,22.286571,114.154943,Hong Kong,Central and Western,22.28666,114.15497
3,Infinitus Plaza (無限極廣場),Shopping Mall,22.286463,114.153689,Hong Kong,Central and Western,22.28666,114.15497
4,Fitness First Platinum,Gym / Fitness Center,22.285881,114.154117,Hong Kong,Central and Western,22.28666,114.15497


In [11]:
hk_venues.size

15824

<h2>Extracting venue information for each district in Singapore</h2>

In [12]:
sg_venues = getNearbyVenues(names=sg_districts_data['District'],
                                   latitudes=sg_districts_data['Latitude'],
                                   longitudes=sg_districts_data['Longitude'],
                                   city="Singapore"
                                  )
sg_venues.head()

District 01 - Raffles Place, Marina, Cecil
District 02 - Tanjong Pagar, Chinatown
District 03 - Tiong Bahru, Alexandra, Queenstown
District 04 - Mount Faber, Telok Blangah, Harbourfront
District 05 - Buona Vista, Pasir Panjang, Clementi
District 06 - Clarke Quay, City Hall
District 07 - Bugis, Beach Road, Golden Mile
District 08 - Little India, Farrer Park
District 09 - Orchard Road, River Valley
District 10 - Bukit Timah, Holland, Balmoral
District 11 - Novena, Newton, Thomson
District 12 - Toa Payoh, Serangoon, Balestier
District 13 - Macpherson, Braddell
District 14 - Geylang, Paya Lebar, Sims
District 15 - Joo Chiat, Marina Parade, Katong
District 16 - Bedok, Upper East Coast, Siglap
District 17 - Changi, Flora, Loyang
District 18 - Tampines, Pasir Ris
District 19 - Punggol, Sengkang, Serangoon Gardens
District 20 - Ang Mo Kio, Bishan, Thomson
District 21 - Upper Bukit Timah, Ulu Pandan, Clementi Park
District 22 - Boon Lay, Jurong, Tuas
District 23 - Choa Chu Kang, Diary Farm, Hil

Unnamed: 0,Name,Category,Latitude,Longitude,City,District,District Latitude,District Longitude
0,Arcade Money Changers,Currency Exchange,1.283997,103.851197,Singapore,"District 01 - Raffles Place, Marina, Cecil",1.28372,103.851239
1,Clifford Centre,Office,1.283816,103.852219,Singapore,"District 01 - Raffles Place, Marina, Cecil",1.28372,103.851239
2,City Money Changers,Financial or Legal Service,1.283475,103.850161,Singapore,"District 01 - Raffles Place, Marina, Cecil",1.28372,103.851239
3,In Touch Physio,Medical Center,1.283503,103.851163,Singapore,"District 01 - Raffles Place, Marina, Cecil",1.28372,103.851239
4,The Arcade,Shopping Mall,1.283746,103.851718,Singapore,"District 01 - Raffles Place, Marina, Cecil",1.28372,103.851239


In [13]:
sg_venues.size

26800

<h2>Some pre-analysis of data</h2>

In [14]:
hk_venues.groupby('Category').count()
print('There are {} uniques categories of venues in Hong Kong.'.format(len(hk_venues['Category'].unique())))

There are 304 uniques categories of venues in Hong Kong.


In [15]:
sg_venues.groupby('Category').count()
print('There are {} uniques categories of venues in Singapore.'.format(len(sg_venues['Category'].unique())))

There are 386 uniques categories of venues in Singapore.


In [16]:
#Combining the two dataframes
all_venues = hk_venues.append(sg_venues)
all_venues.head()
all_venues.size

42624

In [18]:
# one hot encoding
all_onehot = pd.get_dummies(all_venues[['Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
all_onehot['District'] = all_venues['District'] 

# move neighborhood column to the first column
fixed_columns = [all_onehot.columns[-1]] + list(all_onehot.columns[:-1])
all_onehot = all_onehot[fixed_columns]

all_onehot.head()

Unnamed: 0,District,ATM,Accessories Store,Acupuncturist,Adult Education Center,Advertising Agency,African Restaurant,Airport,Airport Gate,Airport Lounge,...,Waterfront,Well,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Yunnan Restaurant,Zoo Exhibit
0,Central and Western,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Central and Western,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Central and Western,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Central and Western,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Central and Western,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
all_grouped = all_onehot.groupby('District').mean().reset_index()
all_grouped

Unnamed: 0,District,ATM,Accessories Store,Acupuncturist,Adult Education Center,Advertising Agency,African Restaurant,Airport,Airport Gate,Airport Lounge,...,Waterfront,Well,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yoga Studio,Yunnan Restaurant,Zoo Exhibit
0,Central and Western,0.0,0.0,0.0,0.011111,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.0
1,"District 01 - Raffles Place, Marina, Cecil",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.007042,0.0,0.0,0.007042,0.0,0.0,0.0
2,"District 02 - Tanjong Pagar, Chinatown",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.007519,0.0,0.0,0.0,0.0,0.0,0.0
3,"District 03 - Tiong Bahru, Alexandra, Queenstown",0.0,0.0,0.007937,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"District 04 - Mount Faber, Telok Blangah, Harb...",0.0,0.008333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.008333,0.0,0.0,0.0
5,"District 05 - Buona Vista, Pasir Panjang, Clem...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.008065,0.0,0.008065,0.0,0.0,0.0,0.0,0.0,0.0
6,"District 06 - Clarke Quay, City Hall",0.0,0.0,0.0,0.0,0.008065,0.0,0.0,0.0,0.0,...,0.008065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"District 07 - Bugis, Beach Road, Golden Mile",0.0,0.007576,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.007576,0.015152,0.0,0.0,0.0
8,"District 08 - Little India, Farrer Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"District 09 - Orchard Road, River Valley",0.0,0.0,0.0,0.0,0.007937,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
all_grouped.shape

(46, 452)

In [21]:
num_top_venues = 5

for hood in all_grouped['District']:
    print("----"+hood+"----")
    temp = all_grouped[all_grouped['District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central and Western----
                 venue  freq
0             Building  0.08
1  Government Building  0.06
2               Office  0.04
3      Doctor's Office  0.04
4          Coffee Shop  0.03


----District 01 - Raffles Place, Marina, Cecil----
         venue  freq
0       Office  0.07
1       Bakery  0.06
2     Building  0.04
3         Bank  0.04
4  Salad Place  0.04


----District 02 - Tanjong Pagar, Chinatown----
                 venue  freq
0  Japanese Restaurant  0.10
1               Office  0.06
2                 Café  0.05
3          Coffee Shop  0.05
4     Asian Restaurant  0.03


----District 03 - Tiong Bahru, Alexandra, Queenstown----
                venue  freq
0            Building  0.06
1   College Classroom  0.06
2         Coffee Shop  0.05
3    Asian Restaurant  0.03
4  Chinese Restaurant  0.03


----District 04 - Mount Faber, Telok Blangah, Harbourfront----
                venue  freq
0      Clothing Store  0.03
1              Office  0.03
2  Chinese Restauran

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['District'] = all_grouped['District']

for ind in np.arange(all_grouped.shape[0]):
    district_venues_sorted.iloc[ind, 1:] = return_most_common_venues(all_grouped.iloc[ind, :], num_top_venues)

district_venues_sorted.head()

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central and Western,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
1,"District 01 - Raffles Place, Marina, Cecil",Office,Bakery,Bank,Building,Café,Salad Place,Health & Beauty Service,Medical Center,Japanese Restaurant,Asian Restaurant
2,"District 02 - Tanjong Pagar, Chinatown",Japanese Restaurant,Office,Café,Coffee Shop,Building,Sandwich Place,Asian Restaurant,Chinese Restaurant,Cosmetics Shop,Park
3,"District 03 - Tiong Bahru, Alexandra, Queenstown",College Classroom,Building,Coffee Shop,Chinese Restaurant,Residential Building (Apartment / Condo),Asian Restaurant,Food Court,Dentist's Office,Medical Center,Nail Salon
4,"District 04 - Mount Faber, Telok Blangah, Harb...",Clothing Store,Chinese Restaurant,Office,Cosmetics Shop,Jewelry Store,Food Court,Boutique,Café,Sporting Goods Shop,Housing Development


<h2>Now we will cluster the districts from both cities</h2>

In [24]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 4

all_grouped_clustering = all_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(all_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 0, 3, 3, 2, 3, 0, 3, 2], dtype=int32)

In [25]:
# add clustering labels
district_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

all_merged = all_venues

# merge to add latitude/longitude for each neighborhood
all_merged = all_merged.join(district_venues_sorted.set_index('District'), on='District')

all_merged.head() # check the last columns!

Unnamed: 0,Name,Category,Latitude,Longitude,City,District,District Latitude,District Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Immigration Department Travel Documents Issuin...,Government Building,22.286864,114.155123,Hong Kong,Central and Western,22.28666,114.15497,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
1,Harbour Building (海港政府大樓),Government Building,22.286592,114.154997,Hong Kong,Central and Western,22.28666,114.15497,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
2,Marine Department 海事處,Government Building,22.286571,114.154943,Hong Kong,Central and Western,22.28666,114.15497,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
3,Infinitus Plaza (無限極廣場),Shopping Mall,22.286463,114.153689,Hong Kong,Central and Western,22.28666,114.15497,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
4,Fitness First Platinum,Gym / Fitness Center,22.285881,114.154117,Hong Kong,Central and Western,22.28666,114.15497,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop


<h2>Let's visualize our cluster on the map (please zoom in to see all the clusters in the two cities)</h2>

In [26]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[hk_latitude, hk_longitude], zoom_start=2)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(all_merged['District Latitude'], all_merged['District Longitude'], all_merged['District'], all_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h2>Now let's take a look at the clusters formed and analyze them</h2>

In [51]:
c1 = all_merged.loc[all_merged['Cluster Labels'] == 0, all_merged.columns[[1] + list(range(5, all_merged.shape[1]))]]
c11 = c1.groupby('District').count()
c1

Unnamed: 0,Category,District,District Latitude,District Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Courthouse,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
1,Housing Development,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
2,Bus Station,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
3,Police Station,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
4,Park,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
5,Grocery Store,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
6,Market,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
7,Park,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
8,Housing Development,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant
9,Pier,Eastern,22.284110,114.224140,0,Chinese Restaurant,Housing Development,Hong Kong Restaurant,Café,Convenience Store,Park,Japanese Restaurant,Bakery,Coffee Shop,Thai Restaurant


In [47]:
c2 = all_merged.loc[all_merged['Cluster Labels'] == 1, all_merged.columns[[1] + list(range(5, all_merged.shape[1]))]]
c22 = c2.groupby('District').count()
c22

Unnamed: 0_level_0,Category,District Latitude,District Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Kowloon City,115,121,121,121,121,121,121,121,121,121,121,121,121,121


In [48]:
c3 =all_merged.loc[all_merged['Cluster Labels'] == 2, all_merged.columns[[1] + list(range(5, all_merged.shape[1]))]]
c33 = c3.groupby('District').count()
c33

Unnamed: 0_level_0,Category,District Latitude,District Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
"District 05 - Buona Vista, Pasir Panjang, Clementi",107,124,124,124,124,124,124,124,124,124,124,124,124,124
"District 09 - Orchard Road, River Valley",112,126,126,126,126,126,126,126,126,126,126,126,126,126
Wan Chai,119,125,125,125,125,125,125,125,125,125,125,125,125,125


In [50]:
c4 = all_merged.loc[all_merged['Cluster Labels'] == 3, all_merged.columns[[1] + list(range(5, all_merged.shape[1]))]]
c44 = c4.groupby('District').count()
c4

Unnamed: 0,Category,District,District Latitude,District Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Government Building,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
1,Government Building,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
2,Government Building,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
3,Shopping Mall,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
4,Gym / Fitness Center,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
5,Travel Lounge,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
6,Boat or Ferry,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
7,Heliport,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
8,Coworking Space,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
9,Coffee Shop,Central and Western,22.286660,114.154970,3,Building,Government Building,Doctor's Office,Office,Bus Stop,Coworking Space,Lawyer,Dentist's Office,Gym / Fitness Center,Coffee Shop
