# Opening A New Shopping Mall in Kualalumpur, Malaysia
___

**Import Librairies**

In [1]:
import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup

import geocoder

from pandas.io.json import json_normalize

from sklearn.cluster import KMeans

import folium

**Web Scrapping**

In [2]:
page = requests.get("https://en.wikipedia.org/wiki/Category:Suburbs_in_Kuala_Lumpur")  
soup = BeautifulSoup(page.text, 'html.parser') 

In [3]:
neighborhoodList = [] 

In [4]:
for row in soup.find_all("div", class_ = "mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)   

In [5]:
neighborhoodList

['Alam Damai',
 'Ampang, Kuala Lumpur',
 'Bandar Menjalara',
 'Bandar Sri Permaisuri',
 'Bandar Tasik Selatan',
 'Bandar Tun Razak',
 'Bangsar',
 'Bangsar Park',
 'Bangsar South',
 'Batu 11 Cheras',
 'Batu, Kuala Lumpur',
 'Brickfields',
 'Bukit Bintang',
 'Bukit Jalil',
 'Bukit Kiara',
 'Bukit Nanas',
 'Bukit Petaling',
 'Bukit Tunku',
 'Cheras, Kuala Lumpur',
 'Chow Kit',
 'Damansara Heights',
 'Damansara Town Centre',
 'Dang Wangi',
 'Desa Petaling',
 'Federal Hill, Kuala Lumpur',
 'Happy Garden',
 'Jalan Cochrane, Kuala Lumpur',
 'Jinjang',
 'Kampung Baru, Kuala Lumpur',
 'Kampung Datuk Keramat',
 'Kampung Kasipillay',
 'Kampung Padang Balang',
 'Kepong',
 'Kepong Baru',
 'KL Eco City',
 'Kuchai Lama',
 'Lembah Pantai',
 'Maluri',
 'Medan Tuanku',
 'Miharja',
 'Mont Kiara',
 'Pantai Dalam',
 'Pudu, Kuala Lumpur',
 'Putrajaya',
 'Salak South',
 'Segambut',
 'Semarak',
 'Sentul, Kuala Lumpur',
 'Setapak',
 'Setiawangsa',
 'Shamelin',
 'Sri Hartamas',
 'Sri Petaling',
 'Sungai Besi',


In [6]:
df = pd.DataFrame({'Neighborhood': neighborhoodList}) 

In [7]:
df.head(10)  

Unnamed: 0,Neighborhood
0,Alam Damai
1,"Ampang, Kuala Lumpur"
2,Bandar Menjalara
3,Bandar Sri Permaisuri
4,Bandar Tasik Selatan
5,Bandar Tun Razak
6,Bangsar
7,Bangsar Park
8,Bangsar South
9,Batu 11 Cheras


**Get Geo Coordinates**

In [8]:
# Define Function to Get Coordinates >>>

def GetLatLng(neighborhood):
    latlngcoords = None   #-------------------------------------------------------- Initializing Variable to None
    while(latlngcoords is None):   #----------------------------------------------- Loop unitl get the coordinates
        g = geocoder.arcgis('{}, Kuala Lumpur, Malaysia'.format(neighborhood)) 
        latlngcoords = g.latlng
    return latlngcoords

In [9]:
# Get the Coordinates Using Function >>>

coords = [ GetLatLng(neighborhood) for neighborhood in df["Neighborhood"].tolist()] 
coords

[[3.0576900000000364, 101.74388000000005],
 [3.1484988508598852, 101.69672774991264],
 [3.1903500000000236, 101.62545000000006],
 [3.1039100000000417, 101.71226000000007],
 [3.072750000000042, 101.71461000000005],
 [3.0827600000000643, 101.72281000000004],
 [3.1292000000000257, 101.67844000000008],
 [3.1292000000000257, 101.67844000000008],
 [3.111020000000053, 101.66283000000004],
 [3.061870000000056, 101.74675000000008],
 [3.147890000000075, 101.69405000000006],
 [3.12916000000007, 101.68406000000004],
 [3.147770000000037, 101.70855000000006],
 [3.0578100000000745, 101.68965000000009],
 [3.1434800000000678, 101.64433000000008],
 [3.1486092228953293, 101.69985445868105],
 [3.129290000000026, 101.69896000000006],
 [3.17381000000006, 101.68276000000009],
 [3.061870000000056, 101.74675000000008],
 [3.163780000000031, 101.69814000000008],
 [3.1479700000000435, 101.66795000000008],
 [3.1364442625375997, 101.69029430612223],
 [3.1578250453611005, 101.69728006780952],
 [3.083300000000065, 10

In [10]:
# Assign Coordinates into a Dataframe >>>

df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude']) 
df_coords.head(8) 

Unnamed: 0,Latitude,Longitude
0,3.05769,101.74388
1,3.148499,101.696728
2,3.19035,101.62545
3,3.10391,101.71226
4,3.07275,101.71461
5,3.08276,101.72281
6,3.1292,101.67844
7,3.1292,101.67844


In [11]:
# Merge Two Dataframes >>>

df['Latitude'] = df_coords['Latitude'] 
df['Longitude'] = df_coords['Longitude'] 

df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Alam Damai,3.057690,101.743880
1,"Ampang, Kuala Lumpur",3.148499,101.696728
2,Bandar Menjalara,3.190350,101.625450
3,Bandar Sri Permaisuri,3.103910,101.712260
4,Bandar Tasik Selatan,3.072750,101.714610
...,...,...,...
66,Taman Tun Dr Ismail,3.152830,101.622710
67,Taman U-Thant,3.157700,101.724520
68,Taman Wahyu,3.222400,101.671730
69,Titiwangsa,3.180730,101.703210


In [12]:
df.to_csv('Neighborhood Coordinates.csv', index=False)  

**Creating BaseMap of Kuala Lumpur**

In [33]:
# Creating base Map of Kuala Lumpur >>>

latitude = 3.1390
longitude = 101.6869
BaseMap = folium.Map([latitude, longitude], zoom_start=11) 

BaseMap

In [14]:
# Placing Lat Long from Dataframe into Basemap >>>

for lat, lng, label in zip(df.Latitude, df.Longitude, df.Neighborhood):
        folium.features.CircleMarker(
            [lat,lng],
            radius=5,
            color='#822645',
            fill=True,
            popup = label,
            fill_color='#EE0B7C',
            fill_opacity=0.9
        ).add_to(BaseMap)
    
BaseMap

**Foursquare API to Explore the Neighborhood**

In [15]:
# DEFINE FOURSQUARE CREDENTIALS >>>

CLIENT_ID = '5JSAYR4BEMZBWKLU4DWVMTQKGKANVFHZBDSOPQ0PYJ45NRBK'
CLIENT_SECRET = 'D2B1VFKX2ZXKO35U3L2RFVEDZ5DO1JDZNLXM1QKDJKR2F1H4'
VERSION = '20180605' 

In [16]:
# GET TOP 100 VENUES WITHIN 2000 METERS >>>

radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    
    # CREATE the API REQUEST URL -------->>>
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(  
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # MAKE THE GET REQUEST
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name'])) 

In [17]:
# CONVER VENUE LIST INTO A DATAFRAME

df_venues = pd.DataFrame(venues, columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory'])  
df_venues

Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Alam Damai,3.05769,101.74388,Jc Deli 皆喜食坊,3.058397,101.748560,Food & Drink Shop
1,Alam Damai,3.05769,101.74388,Pengedar Shaklee Kuala Lumpur,3.061235,101.740696,Supplement Shop
2,Alam Damai,3.05769,101.74388,Machi Noodle 妈子面,3.057695,101.746635,Noodle House
3,Alam Damai,3.05769,101.74388,Darma Motor,3.054160,101.744328,Motorcycle Shop
4,Alam Damai,3.05769,101.74388,628火焰鑫茶室,3.058442,101.747947,Chinese Restaurant
...,...,...,...,...,...,...,...
7077,Wangsa Maju,3.20387,101.73715,Rampai Bazaria Malay Town,3.199526,101.729178,Malay Restaurant
7078,Wangsa Maju,3.20387,101.73715,The Coffee Bean & Tea Leaf,3.204624,101.720668,Coffee Shop
7079,Wangsa Maju,3.20387,101.73715,Arai Helmet Specialist,3.204192,101.722866,Motorcycle Shop
7080,Wangsa Maju,3.20387,101.73715,ARATE LIFESTYLE,3.204108,101.721322,Café


In [18]:
df_venues.groupby(['Neighborhood']).count() 

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alam Damai,100,100,100,100,100,100
"Ampang, Kuala Lumpur",100,100,100,100,100,100
Bandar Menjalara,100,100,100,100,100,100
Bandar Sri Permaisuri,100,100,100,100,100,100
Bandar Tasik Selatan,100,100,100,100,100,100
...,...,...,...,...,...,...
Taman Tun Dr Ismail,100,100,100,100,100,100
Taman U-Thant,100,100,100,100,100,100
Taman Wahyu,100,100,100,100,100,100
Titiwangsa,100,100,100,100,100,100


**One Hot Encoding**

In [19]:
onehot = pd.get_dummies(df_venues[['VenueCategory']], prefix="", prefix_sep="") 
onehot['Neighborhoods'] = df_venues['Neighborhood'] 
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns] 
onehot 

Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,Art Museum,...,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio,Zoo
0,Alam Damai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Alam Damai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Alam Damai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Alam Damai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Alam Damai,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7077,Wangsa Maju,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7078,Wangsa Maju,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7079,Wangsa Maju,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7080,Wangsa Maju,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
grouped = onehot.groupby(["Neighborhoods"]).mean().reset_index() 
grouped

Unnamed: 0,Neighborhoods,Accessories Store,Adult Boutique,African Restaurant,American Restaurant,Antique Shop,Aquarium,Arcade,Art Gallery,Art Museum,...,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio,Zoo
0,Alam Damai,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,...,0.00,0.0,0.00,0.0,0.01,0.00,0.00,0.00,0.00,0.0
1,"Ampang, Kuala Lumpur",0.00,0.01,0.00,0.0,0.0,0.0,0.00,0.00,0.00,...,0.00,0.0,0.00,0.0,0.00,0.00,0.00,0.01,0.00,0.0
2,Bandar Menjalara,0.00,0.00,0.01,0.0,0.0,0.0,0.00,0.00,0.00,...,0.00,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.0
3,Bandar Sri Permaisuri,0.01,0.00,0.00,0.0,0.0,0.0,0.00,0.01,0.00,...,0.01,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.0
4,Bandar Tasik Selatan,0.01,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,...,0.00,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66,Taman Tun Dr Ismail,0.01,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,...,0.00,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.0
67,Taman U-Thant,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.02,0.00,...,0.00,0.0,0.01,0.0,0.00,0.00,0.01,0.00,0.00,0.0
68,Taman Wahyu,0.00,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,...,0.00,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.01,0.0
69,Titiwangsa,0.00,0.00,0.00,0.0,0.0,0.0,0.01,0.01,0.00,...,0.01,0.0,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.0


In [21]:
mall = grouped[["Neighborhoods","Shopping Mall"]] 
mall

Unnamed: 0,Neighborhoods,Shopping Mall
0,Alam Damai,0.00
1,"Ampang, Kuala Lumpur",0.01
2,Bandar Menjalara,0.01
3,Bandar Sri Permaisuri,0.00
4,Bandar Tasik Selatan,0.01
...,...,...
66,Taman Tun Dr Ismail,0.03
67,Taman U-Thant,0.03
68,Taman Wahyu,0.00
69,Titiwangsa,0.01


**Clustering Neighborhood**

In [22]:
kclusters = 3
clustering = mall.drop(['Neighborhoods'], 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(clustering)

kmeans.labels_[0:10] 

array([2, 0, 0, 2, 0, 2, 1, 1, 0, 2])

In [23]:
merged = mall.copy() 
merged['Cluster Labels'] = kmeans.labels_
merged.rename(columns={'Neighborhoods': 'Neighborhood'}, inplace=True) 
merged

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,Alam Damai,0.00,2
1,"Ampang, Kuala Lumpur",0.01,0
2,Bandar Menjalara,0.01,0
3,Bandar Sri Permaisuri,0.00,2
4,Bandar Tasik Selatan,0.01,0
...,...,...,...
66,Taman Tun Dr Ismail,0.03,1
67,Taman U-Thant,0.03,1
68,Taman Wahyu,0.00,2
69,Titiwangsa,0.01,0


In [24]:
merged = merged.join(df.set_index('Neighborhood'), on='Neighborhood')
merged

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Alam Damai,0.00,2,3.057690,101.743880
1,"Ampang, Kuala Lumpur",0.01,0,3.148499,101.696728
2,Bandar Menjalara,0.01,0,3.190350,101.625450
3,Bandar Sri Permaisuri,0.00,2,3.103910,101.712260
4,Bandar Tasik Selatan,0.01,0,3.072750,101.714610
...,...,...,...,...,...
66,Taman Tun Dr Ismail,0.03,1,3.152830,101.622710
67,Taman U-Thant,0.03,1,3.157700,101.724520
68,Taman Wahyu,0.00,2,3.222400,101.671730
69,Titiwangsa,0.01,0,3.180730,101.703210


In [25]:
merged.sort_values(['Cluster Labels'], inplace=True) 
merged

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
70,Wangsa Maju,0.01,0,3.203870,101.737150
21,Damansara Town Centre,0.01,0,3.136444,101.690294
22,Dang Wangi,0.02,0,3.157825,101.697280
50,Shamelin,0.02,0,3.124570,101.735970
60,Taman Melati,0.01,0,3.223570,101.723990
...,...,...,...,...,...
39,Miharja,0.00,2,3.147890,101.694050
41,Pantai Dalam,0.00,2,3.094760,101.667470
44,Salak South,0.00,2,3.081540,101.696890
48,Setapak,0.00,2,3.188160,101.704150


**Map Visualization Neighborhood**

In [26]:
merged['Marker Color']  = pd.cut(merged['Cluster Labels'], 
                                 bins=3, 
                                 labels=['#CA2C61', '#4B40CD', '#E7B81C']) 
merged

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude,Marker Color
70,Wangsa Maju,0.01,0,3.203870,101.737150,#CA2C61
21,Damansara Town Centre,0.01,0,3.136444,101.690294,#CA2C61
22,Dang Wangi,0.02,0,3.157825,101.697280,#CA2C61
50,Shamelin,0.02,0,3.124570,101.735970,#CA2C61
60,Taman Melati,0.01,0,3.223570,101.723990,#CA2C61
...,...,...,...,...,...,...
39,Miharja,0.00,2,3.147890,101.694050,#E7B81C
41,Pantai Dalam,0.00,2,3.094760,101.667470,#E7B81C
44,Salak South,0.00,2,3.081540,101.696890,#E7B81C
48,Setapak,0.00,2,3.188160,101.704150,#E7B81C


In [27]:
latitude = 3.1390
longitude = 101.6869
Map = folium.Map([latitude, longitude], zoom_start=11) 

In [28]:
for lat, lng, cluster, label in zip(merged.Latitude, merged.Longitude, merged['Marker Color'], merged['Cluster Labels']):
        folium.features.CircleMarker(
            [lat,lng],
            radius=7,
            color=cluster,
            popup = label,
            fill=True,
            fill_color=cluster,
            fill_opacity=0.5
        ).add_to(Map) 

# Add Layer Control
folium.TileLayer('openstreetmap').add_to(Map)
folium.TileLayer('cartodbpositron').add_to(Map)
folium.LayerControl().add_to(Map) 

Map

In [29]:
Map.save('Map.html') 

**Examine Clusters**

Cluster_0

In [34]:
merged.loc[merged['Cluster Labels'] == 0]  

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude,Marker Color
70,Wangsa Maju,0.01,0,3.20387,101.73715,#CA2C61
21,Damansara Town Centre,0.01,0,3.136444,101.690294,#CA2C61
22,Dang Wangi,0.02,0,3.157825,101.69728,#CA2C61
50,Shamelin,0.02,0,3.12457,101.73597,#CA2C61
60,Taman Melati,0.01,0,3.22357,101.72399,#CA2C61
30,Kampung Datuk Keramat,0.01,0,3.1664,101.73046,#CA2C61
31,Kampung Kasipillay,0.02,0,3.17776,101.6824,#CA2C61
57,Taman Duta,0.02,0,3.15562,101.67184,#CA2C61
56,Taman Desa,0.01,0,3.10297,101.68471,#CA2C61
69,Titiwangsa,0.01,0,3.18073,101.70321,#CA2C61


Cluster_1

In [35]:
merged.loc[merged['Cluster Labels'] == 1]  

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude,Marker Color
42,"Pudu, Kuala Lumpur",0.04,1,3.13354,101.71307,#4B40CD
67,Taman U-Thant,0.03,1,3.1577,101.72452,#4B40CD
40,Mont Kiara,0.03,1,3.16529,101.65242,#4B40CD
6,Bangsar,0.05,1,3.1292,101.67844,#4B40CD
36,Lembah Pantai,0.04,1,3.121189,101.663889,#4B40CD
7,Bangsar Park,0.05,1,3.1292,101.67844,#4B40CD
12,Bukit Bintang,0.03,1,3.14777,101.70855,#4B40CD
66,Taman Tun Dr Ismail,0.03,1,3.15283,101.62271,#4B40CD
11,Brickfields,0.04,1,3.12916,101.68406,#4B40CD
29,"Kampung Baru, Kuala Lumpur",0.03,1,3.16546,101.71028,#4B40CD


Cluster_2

In [36]:
merged.loc[merged['Cluster Labels'] == 2]   

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude,Marker Color
58,Taman Ibukota,0.0,2,3.21231,101.71525,#E7B81C
61,Taman Midah,0.0,2,3.09359,101.72837,#E7B81C
65,Taman Taynton View,0.0,2,3.08707,101.73681,#E7B81C
54,Taman Bukit Maluri,0.0,2,3.20066,101.63337,#E7B81C
63,Taman P. Ramlee,0.0,2,3.19394,101.70573,#E7B81C
52,Sri Petaling,0.0,2,3.0726,101.68252,#E7B81C
68,Taman Wahyu,0.0,2,3.2224,101.67173,#E7B81C
59,Taman Len Seng,0.0,2,3.06908,101.74287,#E7B81C
62,Taman OUG,0.0,2,3.210051,101.634508,#E7B81C
55,Taman Connaught,0.0,2,3.08269,101.73689,#E7B81C


As observations noted from the map in the Results section, most of the shopping malls are concentrated in the central area of Kuala Lumpur city, with the highest number in cluster 1 and moderate number in cluster 0. On the other hand, cluster 2 has no shopping mall in the neighborhoods. This represents a great opportunity and high potential areas to open new shopping malls as there is very little to no competition from existing malls. Meanwhile, shopping malls in cluster 1 are likely suffering from intense competition due to oversupply and high concentration of shopping malls. From another perspective, the results also show that the oversupply of shopping malls mostly happened in the central area of the city, with the suburb area still have very few shopping malls. Therefore, this project recommends property developers to capitalize on these findings to open new shopping malls in neighborhoods in cluster 2 with no competition. Property developers with unique selling propositions to stand out from the competition can also open new shopping malls in neighborhoods in cluster 0 with low competition. Lastly, property developers are advised to avoid neighborhoods in cluster 1 which already have high concentration of shopping malls and suffering from intense competition.

