### 1. Import libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

!pip install folium
!pip install geocoder
import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 5.7 MB/s  eta 0:00:01
[?25hCollecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1
Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 10.2 MB/s eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
Libraries imported.


### 2. Scrap data from Wikipedia page into a DataFrame 

In [2]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Lucknow").text

In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [4]:
soup

<!DOCTYPE html>

<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>Category:Neighbourhoods in Lucknow - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"YBvCVwpAIC0AAEXfsREAAADO","wgCSPNonce":!1,"wgCanonicalNamespace":"Category","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":14,"wgPageName":"Category:Neighbourhoods_in_Lucknow","wgTitle":"Neighbourhoods in Lucknow","wgCurRevisionId":796836602,"wgRevisionId":796836602,"wgArticleId":18717639,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Lucknow","Neighbourhoods in Uttar Pradesh"],"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevant

In [5]:
# create a list to store neighborhood data
neighborhoodList = []

In [6]:
# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [7]:
# create a new DataFrame from the list
lkw_df = pd.DataFrame({"Neighborhood": neighborhoodList})

#lkw_df = kl_df.iloc[1:]
lkw_df

Unnamed: 0,Neighborhood
0,Aishbagh
1,Alambagh
2,"Aminabad, Lucknow"
3,"Ashiyana, Lucknow"
4,Badshah Nagar
5,Cis-Gomti area
6,Dilkusha
7,Dugawan
8,Gola Ganj
9,Gomti Nagar


In [8]:
lkw_df.shape

(21, 1)

### 3.Get the geographical coordinates

In [57]:
address = 'Lucknow, India'

geolocator = Nominatim(user_agent="LKW_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Lucknow are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Lucknow are 26.8381, 80.9346001.


In [58]:
# define a function to get coordinates
def get_latlng(neighborhood):
    #print("neighboutrhood", neighborhood)
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Lucknow, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [None]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in lkw_df["Neighborhood"].tolist() ]

In [None]:
coords

In [None]:
len(coords)

In [None]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [None]:
# # merge the coordinates into the original dataframe
lkw_df['Latitude'] = df_coords['Latitude']
lkw_df['Longitude'] = df_coords['Longitude']

# check the neighborhoods and the coordinates
print(lkw_df.shape)
lkw_df

In [16]:
# save the DataFrame as CSV file
lkw_df.to_csv("lkw_df.csv", index=False)

# Create a map of Lucknow with neighborhoods superimposed on top¶

In [17]:
# get the coordinates of Kuala Lumpur
address = 'Lucknow, India'

geolocator = Nominatim(user_agent="LKW_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Lucknow, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Lucknow, India 26.8381, 80.9346001.


In [18]:
# create map of Toronto using latitude and longitude values
map_lkw = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(lkw_df['Latitude'], lkw_df['Longitude'], lkw_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_lkw)  
    
map_lkw

In [19]:
# save the map as HTML file
map_lkw.save('map_lkw.html')

### Use the Foursquare API to explore the neighborhoods

In [54]:
# define Foursquare Credentials and Version
CLIENT_ID = '3I05EYEV3HVMFUAUUEMAHF2T5A21QDJMVWKX4TITMQBJ4XKR' # your Foursquare ID
CLIENT_SECRET = 'CPU3W4GZF5MSNNT2FKXXWZJPE5L4D55D4YHAG4CWTCN3NJ4I' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3I05EYEV3HVMFUAUUEMAHF2T5A21QDJMVWKX4TITMQBJ4XKR
CLIENT_SECRET:CPU3W4GZF5MSNNT2FKXXWZJPE5L4D55D4YHAG4CWTCN3NJ4I


#### Now, let's get the top 100 venues that are within a radius of 2000 meters.

In [55]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(lkw_df['Latitude'], lkw_df['Longitude'], lkw_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [22]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(226, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Aishbagh,26.84018,80.90053,Naka Hindola,26.837176,80.920334,Market
1,Aishbagh,26.84018,80.90053,Axis Bank ATM,26.831772,80.888654,ATM
2,Aishbagh,26.84018,80.90053,Axis Bank ATM,26.849,80.88696,ATM
3,Aishbagh,26.84018,80.90053,Lucknow Junction Railway Station | लखनऊ जंक्शन...,26.831779,80.918341,Train Station
4,Alambagh,26.81462,80.90332,Phoenix United Mall,26.798718,80.897028,Shopping Mall


#### Let's check how many venues were returned for each neighorhood

In [23]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aishbagh,4,4,4,4,4,4
Alambagh,7,7,7,7,7,7
"Aminabad, Lucknow",26,26,26,26,26,26
"Ashiyana, Lucknow",12,12,12,12,12,12
Badshah Nagar,14,14,14,14,14,14
Dilkusha,7,7,7,7,7,7
Dugawan,10,10,10,10,10,10
Gola Ganj,19,19,19,19,19,19
Gomti Nagar,11,11,11,11,11,11
Hata -e- Sheikhan,11,11,11,11,11,11


#### Let's find out how many unique categories can be curated from all the returned venues

In [24]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 51 uniques categories.


In [25]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:60]

array(['Market', 'ATM', 'Train Station', 'Shopping Mall',
       'Fast Food Restaurant', 'Pizza Place', 'Multiplex', 'Bus Station',
       'Electronics Store', 'Department Store', 'Indian Restaurant',
       'Ice Cream Shop', 'Tea Room', 'Neighborhood', 'Café', 'Hotel',
       'Coffee Shop', 'Food Court', 'American Restaurant', 'Restaurant',
       'Snack Place', 'Lounge', 'Bakery', 'Pharmacy', 'Business Service',
       "Women's Store", 'Outdoors & Recreation', 'Sporting Goods Shop',
       'Sports Bar', 'Asian Restaurant', 'Chinese Restaurant',
       'Music Venue', 'Vegetarian / Vegan Restaurant', 'Clothing Store',
       'Golf Course', 'Food Truck', 'Hookah Bar', 'Plaza', 'Park',
       'History Museum', 'Breakfast Spot', 'Antique Shop',
       'Sandwich Place', 'Bed & Breakfast', 'Sculpture Garden',
       'Fried Chicken Joint', 'Flea Market', 'Building', 'Gym',
       'Mobile Phone Shop', 'Dessert Shop'], dtype=object)

In [26]:
# check if the results contain "Shopping Mall"
"Neighborhood" in venues_df['VenueCategory'].unique()


True

#### Analyze Each Neighborhood

In [27]:
# one hot encoding
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()

(226, 52)


Unnamed: 0,Neighborhoods,ATM,American Restaurant,Antique Shop,Asian Restaurant,Bakery,Bed & Breakfast,Breakfast Spot,Building,Bus Station,Business Service,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Department Store,Dessert Shop,Electronics Store,Fast Food Restaurant,Flea Market,Food Court,Food Truck,Fried Chicken Joint,Golf Course,Gym,History Museum,Hookah Bar,Hotel,Ice Cream Shop,Indian Restaurant,Lounge,Market,Mobile Phone Shop,Multiplex,Music Venue,Neighborhood,Outdoors & Recreation,Park,Pharmacy,Pizza Place,Plaza,Restaurant,Sandwich Place,Sculpture Garden,Shopping Mall,Snack Place,Sporting Goods Shop,Sports Bar,Tea Room,Train Station,Vegetarian / Vegan Restaurant,Women's Store
0,Aishbagh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Aishbagh,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Aishbagh,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Aishbagh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,Alambagh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [28]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
kl_grouped = kl_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped

(20, 52)


Unnamed: 0,Neighborhoods,ATM,American Restaurant,Antique Shop,Asian Restaurant,Bakery,Bed & Breakfast,Breakfast Spot,Building,Bus Station,Business Service,Café,Chinese Restaurant,Clothing Store,Coffee Shop,Department Store,Dessert Shop,Electronics Store,Fast Food Restaurant,Flea Market,Food Court,Food Truck,Fried Chicken Joint,Golf Course,Gym,History Museum,Hookah Bar,Hotel,Ice Cream Shop,Indian Restaurant,Lounge,Market,Mobile Phone Shop,Multiplex,Music Venue,Neighborhood,Outdoors & Recreation,Park,Pharmacy,Pizza Place,Plaza,Restaurant,Sandwich Place,Sculpture Garden,Shopping Mall,Snack Place,Sporting Goods Shop,Sports Bar,Tea Room,Train Station,Vegetarian / Vegan Restaurant,Women's Store
0,Aishbagh,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0
1,Alambagh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Aminabad, Lucknow",0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115385,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.115385,0.038462,0.192308,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.076923,0.038462,0.0,0.0,0.038462,0.076923,0.0,0.0
3,"Ashiyana, Lucknow",0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.083333
4,Badshah Nagar,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.071429,0.071429,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0
5,Dilkusha,0.428571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857
6,Dugawan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.2,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.2,0.0,0.0
7,Gola Ganj,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.157895,0.052632,0.263158,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.052632,0.0,0.0,0.0
8,Gomti Nagar,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0
9,Hata -e- Sheikhan,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.090909,0.090909,0.272727,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
len(kl_grouped[kl_grouped["Shopping Mall"] > 0])

11

#### Create a new DataFrame for Shopping Mall data only

In [30]:
kl_mall = kl_grouped[["Neighborhoods","Shopping Mall"]]

In [31]:
kl_mall.head()

Unnamed: 0,Neighborhoods,Shopping Mall
0,Aishbagh,0.0
1,Alambagh,0.142857
2,"Aminabad, Lucknow",0.076923
3,"Ashiyana, Lucknow",0.0
4,Badshah Nagar,0.071429


#### Run k-means to cluster the neighborhoods in Lucknow into 3 clusters. 

In [42]:
kclusters = 3

kl_clustering = kl_mall.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 0, 2, 1, 2, 1, 2, 2, 2, 1], dtype=int32)

In [43]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_mall.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [44]:
kl_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
kl_merged.head()

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels
0,Aishbagh,0.0,1
1,Alambagh,0.142857,0
2,"Aminabad, Lucknow",0.076923,2
3,"Ashiyana, Lucknow",0.0,1
4,Badshah Nagar,0.071429,2


In [46]:
kl_merged = kl_merged.join(lkw_df.set_index("Neighborhood"), on="Neighborhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!

(20, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Aishbagh,0.0,1,26.84018,80.90053
1,Alambagh,0.142857,0,26.81462,80.90332
2,"Aminabad, Lucknow",0.076923,2,26.84542,80.92722
3,"Ashiyana, Lucknow",0.0,1,26.78856,80.92003
4,Badshah Nagar,0.071429,2,26.86949,80.96114


In [47]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

(20, 5)


Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
1,Alambagh,0.142857,0,26.81462,80.90332
15,"Rajendra Nagar, Lucknow",0.142857,0,26.84034,80.91418
12,"Krishna Nagar, Lucknow",0.142857,0,26.79657,80.88426
0,Aishbagh,0.0,1,26.84018,80.90053
17,Shivpuri colony,0.0,1,26.9004,80.98706
16,Sarvodaya Nagar,0.0,1,26.88196,80.97083
14,"Mehndiganj, Uttar Pradesh, Lucknow",0.0,1,26.85027,80.89346
10,Hazratganj,0.033333,1,26.84838,80.95307
18,Trans-Gomti area,0.0,1,26.85471,80.92135
9,Hata -e- Sheikhan,0.0,1,26.85471,80.92135


In [48]:
kl_merged = kl_merged[kl_merged['Latitude'].notna()]

In [49]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], kl_merged['Neighborhood'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [50]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

### 8. Examine Clusters

#### Cluster 0

In [51]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
1,Alambagh,0.142857,0,26.81462,80.90332
15,"Rajendra Nagar, Lucknow",0.142857,0,26.84034,80.91418
12,"Krishna Nagar, Lucknow",0.142857,0,26.79657,80.88426


#### Cluster 1

In [52]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
0,Aishbagh,0.0,1,26.84018,80.90053
17,Shivpuri colony,0.0,1,26.9004,80.98706
16,Sarvodaya Nagar,0.0,1,26.88196,80.97083
14,"Mehndiganj, Uttar Pradesh, Lucknow",0.0,1,26.85027,80.89346
10,Hazratganj,0.033333,1,26.84838,80.95307
18,Trans-Gomti area,0.0,1,26.85471,80.92135
9,Hata -e- Sheikhan,0.0,1,26.85471,80.92135
5,Dilkusha,0.0,1,26.79407,80.93798
3,"Ashiyana, Lucknow",0.0,1,26.78856,80.92003
19,"Usman Enclave, Lucknow",0.0,1,26.89989,80.95233


#### Cluster 2

In [53]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Shopping Mall,Cluster Labels,Latitude,Longitude
7,Gola Ganj,0.105263,2,26.85045,80.92582
11,"Indira Nagar, Lucknow",0.090909,2,26.88668,80.99333
6,Dugawan,0.1,2,26.84525,80.92002
13,Maulviganj,0.076923,2,26.84761,80.92226
4,Badshah Nagar,0.071429,2,26.86949,80.96114
2,"Aminabad, Lucknow",0.076923,2,26.84542,80.92722
8,Gomti Nagar,0.090909,2,26.84922,80.99726


#### Observations:
Most of the shopping malls are concentrated in the central area of Lucknow city, with the highest number in cluster 2 and moderate number in cluster 1. On the other hand, cluster 1 has very low number to totally no shopping mall in the neighborhoods. This represents a great opportunity and high potential areas to open new shopping malls as there is very little to no competition from existing malls. Meanwhile, shopping malls in cluster 2 are likely suffering from intense competition due to oversupply and high concentration of shopping malls. From another perspective, this also shows that the oversupply of shopping malls mostly happened in the central area of the city, with the suburb area still have very few shopping malls. Therefore, this project recommends property developers to capitalize on these findings to open new shopping malls in neighborhoods in cluster 1 with little to no competition. Property developers with unique selling propositions to stand out from the competition can also open new shopping malls in neighborhoods in cluster 0 with moderate competition. Lastly, property developers are advised to avoid neighborhoods in cluster 2 which already have high concentration of shopping malls and suffering from intense competition.