# IBM Applied Data Science Capstone Course by Coursera
### Week 5 Final Report
**_Opening Juice Bars in Dhaka, Bangladesh_**

## Importing Libraries

In [14]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


## Download and Scrap Dhaka City Neighborhood from Wikipedia 

In [3]:
url = 'https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Dhaka'
html_data = requests.get(url).text

#### Parse html data from beautifulsoup

In [6]:
soup = BeautifulSoup(html_data, 'html.parser')

# create a list to store neighborhood data
neighborhoodList = []

# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)  

## Create pandas Dataframe

In [17]:
# create a new DataFrame from the list
df_dhk = pd.DataFrame({"Neighborhood": neighborhoodList})

print(df_dhk.shape)
df_dhk.head()

(42, 1)


Unnamed: 0,Neighborhood
0,Agargaon
1,Armanitola
2,"Azimpur, Dhaka"
3,"Bailey Road, Dhaka"
4,Banani DOHS


## Getting geographical Data

In [18]:
# define a function to get coordinates
def get_latlng(neighborhood):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Dhaka, Bangladesh'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [19]:
coords = [ get_latlng(neighborhood) for neighborhood in dhk_df["Neighborhood"].tolist() ]
coords

[[23.777310000000057, 90.37273000000005],
 [23.738950000000045, 90.38594000000006],
 [23.726120000000037, 90.38296000000008],
 [23.741339702576497, 90.40411000858829],
 [23.79388000000006, 90.39656000000008],
 [23.75714000000005, 90.42397000000005],
 [23.723690000000033, 90.40549000000004],
 [23.811990000000037, 90.07630000000006],
 [23.8094415, 90.41287050000003],
 [23.81179000000003, 90.42119000000008],
 [24.800000000000068, 90.30000000000007],
 [23.83275414186033, 90.46328453268227],
 [23.706925568439097, 90.4300710240764],
 [23.76962000000003, 90.39830000000006],
 [23.70428870904175, 90.41912798662186],
 [23.713230000000067, 90.39957000000004],
 [23.698620000000062, 90.44745000000006],
 [23.783300000000054, 90.35760000000005],
 [23.61791000000005, 90.50227000000007],
 [23.717390000000023, 90.38787000000008],
 [23.756490000000042, 90.36814000000004],
 [23.650000000000034, 90.58333000000005],
 [23.746800000000064, 90.41449000000006],
 [23.834460000000036, 90.36942000000005],
 [23.783

#### Update dataframe

In [24]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

df_dhk['Latitude'] = df_coords['Latitude']
df_dhk['Longitude'] = df_coords['Longitude']

print(df_dhk.shape)
df_dhk.head()

(42, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Agargaon,23.77731,90.37273
1,Armanitola,23.73895,90.38594
2,"Azimpur, Dhaka",23.72612,90.38296
3,"Bailey Road, Dhaka",23.74134,90.40411
4,Banani DOHS,23.79388,90.39656


### Create a map of Dhaka City with neighborhoods superimposed on top

In [25]:
address = 'Dhaka, Bangladesh'

geolocator = Nominatim(user_agent="ahnirab-agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Dhaka, Bangladesh {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Dhaka, Bangladesh 23.7593572, 90.3788136.


In [27]:
map_dhk = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(df_dhk['Latitude'], df_dhk['Longitude'], df_dhk['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_dhk)  
map_dhk

In [28]:
# save the map as HTML file
map_dhk.save('map_dhk.html')

### Explore the neighborhoods using Foursquare API

In [29]:
CLIENT_ID = '31O1VXXIXMYGW2KGRWKFHWGIQ5NFVLPRQFC0WGHXX4003NNK' # your Foursquare ID
CLIENT_SECRET = 'LUABCJ2UFNVMUJDC4V3S2LX1NK3EXMNXNWWPHDIJM53UEDJW' # your Foursquare Secret
VERSION = '20180605'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 31O1VXXIXMYGW2KGRWKFHWGIQ5NFVLPRQFC0WGHXX4003NNK
CLIENT_SECRET:LUABCJ2UFNVMUJDC4V3S2LX1NK3EXMNXNWWPHDIJM53UEDJW


**Now, let's get the top 100 venues that are within a radius of 2000 meters.**

In [30]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df_dhk['Latitude'], df_dhk['Longitude'], df_dhk['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [31]:
# convert the venues DataFrame
df_venues = pd.DataFrame(venues)

df_venues.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(df_venues.shape)
df_venues.head()

(1598, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Agargaon,23.77731,90.37273,BCS Computer City,23.778736,90.379514,Electronics Store
1,Agargaon,23.77731,90.37273,Mostakim's Chap,23.766818,90.364711,Asian Restaurant
2,Agargaon,23.77731,90.37273,Shishu Mela,23.77309,90.36791,Theme Park
3,Agargaon,23.77731,90.37273,Captains World,23.775472,90.390767,Fast Food Restaurant
4,Agargaon,23.77731,90.37273,Shyamoli Square,23.774221,90.364986,Shopping Mall


**Let's check how many venues for each neighorhood**

In [32]:
df_venues.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agargaon,11,11,11,11,11,11
Armanitola,92,92,92,92,92,92
"Azimpur, Dhaka",49,49,49,49,49,49
"Bailey Road, Dhaka",67,67,67,67,67,67
Banani DOHS,84,84,84,84,84,84
Banasree,10,10,10,10,10,10
Bangla Bazar,31,31,31,31,31,31
Baridhara DOHS,81,81,81,81,81,81
Bashundhara Residential Area,58,58,58,58,58,58
Chowk Bazaar,7,7,7,7,7,7


In [40]:
print('There are {} uniques categories.'.format(len(df_venues['VenueCategory'].unique())))
# print out the list of categories
df_venues['VenueCategory'].unique()[:100]

There are 115 uniques categories.


array(['Electronics Store', 'Asian Restaurant', 'Theme Park',
       'Fast Food Restaurant', 'Shopping Mall', 'Steakhouse',
       'Indian Restaurant', 'Market', 'Department Store', 'Bus Line',
       'Bookstore', 'Shawarma Place', 'Bakery', 'Restaurant', 'Nightclub',
       'Art Gallery', 'Street Food Gathering', 'Bike Shop', 'Plaza',
       'Multiplex', "Dentist's Office", 'Convenience Store', 'BBQ Joint',
       'Café', 'History Museum', 'Fried Chicken Joint', 'Clothing Store',
       'Park', 'Ice Cream Shop', 'Italian Restaurant', 'Pharmacy',
       'Pizza Place', 'Food', 'Burger Joint', 'Thai Restaurant',
       'Scenic Lookout', 'Chinese Restaurant', 'North Indian Restaurant',
       'Bistro', 'Hotel', 'Diner', 'Boutique', 'Recreation Center',
       'Food Court', 'Soccer Field', 'Arts & Crafts Store', 'Shoe Store',
       'Historic Site', 'Coffee Shop', 'Bus Station',
       'Other Great Outdoors', 'Performing Arts Venue',
       'American Restaurant', 'Flea Market', 'Fish Marke

### Analyze Each Neighborhood

In [37]:
# one hot encoding
df_dhk_onehot = pd.get_dummies(df_venues[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
df_dhk_onehot['Neighborhoods'] = df_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [df_dhk_onehot.columns[-1]] + list(df_dhk_onehot.columns[:-1])
df_dhk_onehot = df_dhk_onehot[fixed_columns]

print(df_dhk_onehot.shape)
df_dhk_onehot.head()


(1598, 116)


Unnamed: 0,Neighborhoods,Airport,Airport Service,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beer Garden,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Burger Joint,Bus Line,Bus Station,Bus Stop,Café,Castle,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Convention Center,Cosmetics Shop,Dentist's Office,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,English Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food Court,Food Truck,Fried Chicken Joint,Furniture / Home Store,Golf Course,Grocery Store,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Lake,Light Rail Station,Lounge,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Multiplex,Museum,Nightclub,North Indian Restaurant,Office,Other Great Outdoors,Outlet Store,Park,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Plaza,Portuguese Restaurant,Recreation Center,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shawarma Place,Shoe Store,Shopping Mall,Snack Place,Soccer Field,Social Club,Spa,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Train Station,Turkish Restaurant,Women's Store
0,Agargaon,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Agargaon,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Agargaon,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,Agargaon,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Agargaon,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


**Now, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category**

In [38]:
df_dhk_grouped = df_dhk_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(df_dhk_grouped.shape)
df_dhk_grouped

(39, 116)


Unnamed: 0,Neighborhoods,Airport,Airport Service,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Beer Garden,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Burger Joint,Bus Line,Bus Station,Bus Stop,Café,Castle,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Convention Center,Cosmetics Shop,Dentist's Office,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,English Restaurant,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Food,Food Court,Food Truck,Fried Chicken Joint,Furniture / Home Store,Golf Course,Grocery Store,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Juice Bar,Korean Restaurant,Lake,Light Rail Station,Lounge,Market,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Multiplex,Museum,Nightclub,North Indian Restaurant,Office,Other Great Outdoors,Outlet Store,Park,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Plaza,Portuguese Restaurant,Recreation Center,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shawarma Place,Shoe Store,Shopping Mall,Snack Place,Soccer Field,Social Club,Spa,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Train Station,Turkish Restaurant,Women's Store
0,Agargaon,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0
1,Armanitola,0.0,0.0,0.0,0.021739,0.0,0.01087,0.0,0.076087,0.0,0.01087,0.043478,0.0,0.0,0.01087,0.01087,0.0,0.01087,0.01087,0.0,0.0,0.01087,0.0,0.0,0.0,0.119565,0.0,0.01087,0.01087,0.0,0.01087,0.0,0.0,0.01087,0.01087,0.0,0.01087,0.0,0.021739,0.0,0.0,0.086957,0.0,0.0,0.0,0.01087,0.01087,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.01087,0.0,0.0,0.0,0.021739,0.0,0.032609,0.01087,0.0,0.01087,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01087,0.0,0.0,0.0,0.0,0.01087,0.0,0.01087,0.01087,0.0,0.0,0.0,0.021739,0.0,0.01087,0.021739,0.0,0.032609,0.0,0.01087,0.076087,0.0,0.0,0.01087,0.0,0.01087,0.01087,0.076087,0.0,0.01087,0.0,0.0,0.0,0.01087,0.0,0.0,0.0,0.01087,0.0,0.0,0.0,0.0,0.0,0.0
2,"Azimpur, Dhaka",0.0,0.0,0.0,0.040816,0.0,0.020408,0.0,0.061224,0.0,0.020408,0.040816,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.040816,0.0,0.020408,0.0,0.081633,0.0,0.0,0.020408,0.020408,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.122449,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.040816,0.0,0.040816,0.0,0.020408,0.081633,0.0,0.0,0.020408,0.0,0.020408,0.0,0.061224,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0
3,"Bailey Road, Dhaka",0.0,0.0,0.014925,0.029851,0.0,0.0,0.0,0.029851,0.0,0.029851,0.014925,0.0,0.0,0.0,0.0,0.0,0.014925,0.014925,0.0,0.0,0.014925,0.0,0.044776,0.0,0.044776,0.0,0.0,0.044776,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.044776,0.0,0.0,0.059701,0.0,0.014925,0.029851,0.014925,0.014925,0.0,0.029851,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.059701,0.0,0.0,0.029851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.014925,0.0,0.014925,0.0,0.0,0.014925,0.0,0.014925,0.014925,0.0,0.014925,0.0,0.059701,0.0,0.0,0.074627,0.0,0.0,0.014925,0.0,0.0,0.0,0.089552,0.0,0.014925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Banani DOHS,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.071429,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.02381,0.0,0.0,0.0,0.107143,0.0,0.0,0.035714,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.011905,0.047619,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.011905,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.083333,0.0,0.02381,0.047619,0.0,0.035714,0.02381,0.011905,0.02381,0.02381,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.035714,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.0,0.011905,0.011905,0.011905,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0
5,Banasree,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
6,Bangla Bazar,0.032258,0.0,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.032258,0.064516,0.0,0.0,0.0,0.064516,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.064516,0.032258,0.0,0.032258,0.0,0.096774,0.0,0.0,0.129032,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Baridhara DOHS,0.0,0.0,0.012346,0.0,0.0,0.0,0.012346,0.024691,0.0,0.0,0.012346,0.024691,0.012346,0.0,0.024691,0.0,0.0,0.0,0.0,0.0,0.024691,0.0,0.0,0.0,0.135802,0.0,0.0,0.024691,0.049383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.061728,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,0.0,0.012346,0.024691,0.037037,0.0,0.0,0.0,0.012346,0.012346,0.0,0.061728,0.012346,0.012346,0.024691,0.0,0.037037,0.0,0.012346,0.012346,0.012346,0.0,0.012346,0.012346,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.049383,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0,0.037037,0.0,0.0,0.0,0.024691,0.0,0.0,0.037037,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.012346,0.0,0.012346,0.0,0.0,0.0,0.0,0.0,0.0
8,Bashundhara Residential Area,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.017241,0.034483,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.137931,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.017241,0.0,0.017241,0.0,0.0,0.017241,0.017241,0.051724,0.0,0.0,0.0,0.0,0.017241,0.017241,0.051724,0.017241,0.0,0.017241,0.0,0.034483,0.0,0.017241,0.017241,0.034483,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.051724,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.017241,0.0,0.051724,0.0,0.0,0.0,0.017241,0.0,0.0,0.034483,0.0,0.017241,0.0,0.017241,0.0,0.0,0.017241,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
9,Chowk Bazaar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0


In [43]:
len(df_dhk_grouped[df_dhk_grouped["Juice Bar"] > 0])

14

**Create a new DataFrame for Juice Bar data only**

In [45]:
df_juice_bar = df_dhk_grouped[["Neighborhoods","Juice Bar"]]
df_juice_bar.head()

Unnamed: 0,Neighborhoods,Juice Bar
0,Agargaon,0.0
1,Armanitola,0.0
2,"Azimpur, Dhaka",0.0
3,"Bailey Road, Dhaka",0.0
4,Banani DOHS,0.02381


### Clustering Neighborhoods
Run k-means to cluster the neighborhoods in Dhaka into 6 clusters.

In [46]:
# set number of clusters
kclusters = 6

df_jb_clustering = df_juice_bar.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_jb_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 2, 0, 2, 4, 4, 0])

In [48]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
df_dhk_merged = df_juice_bar.copy()

# add clustering labels
df_dhk_merged["Cluster Labels"] = kmeans.labels_

df_dhk_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
df_dhk_merged.head()

Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels
0,Agargaon,0.0,0
1,Armanitola,0.0,0
2,"Azimpur, Dhaka",0.0,0
3,"Bailey Road, Dhaka",0.0,0
4,Banani DOHS,0.02381,2


In [51]:
df_dhk_merged = df_dhk_merged.join(df_dhk.set_index("Neighborhood"), on="Neighborhood")

print(df_dhk_merged.shape)
df_dhk_merged.head()

(39, 5)


Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
0,Agargaon,0.0,0,23.77731,90.37273
1,Armanitola,0.0,0,23.73895,90.38594
2,"Azimpur, Dhaka",0.0,0,23.72612,90.38296
3,"Bailey Road, Dhaka",0.0,0,23.74134,90.40411
4,Banani DOHS,0.02381,2,23.79388,90.39656


In [52]:
# sort the results by Cluster Labels
print(df_dhk_merged.shape)
df_dhk_merged.sort_values(["Cluster Labels"], inplace=True)
df_dhk_merged

(39, 5)


Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
0,Agargaon,0.0,0,23.77731,90.37273
35,Tejturi Bazar,0.0,0,23.7542,90.39487
33,"Shantinagar, Dhaka",0.0,0,23.7383,90.41358
32,Shankhari Bazaar,0.0,0,23.55,90.5
31,Shahjadpur,0.0,0,23.79048,90.42754
30,Shahbag,0.0,0,23.73545,90.40036
29,Rajarbagh,0.0,0,23.74073,90.41596
28,"Pilkhana, Bangladesh",0.0,0,23.73397,90.37411
26,Nilkhet,0.0,0,23.733215,90.393145
24,"New Market, Dhaka",0.0,0,23.73417,90.38448


**Finally, let's visualize the resulting clusters**

In [53]:
# create map
map_jb_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_dhk_merged['Latitude'], df_dhk_merged['Longitude'], df_dhk_merged['Neighborhood'], df_dhk_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_jb_clusters)
       
map_jb_clusters

In [54]:
# save the map as HTML file
map_jb_clusters.save('map_jb_clusters.html')

### Examine Clusters


#### Cluster 0

In [55]:
df_dhk_merged.loc[df_dhk_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
0,Agargaon,0.0,0,23.77731,90.37273
35,Tejturi Bazar,0.0,0,23.7542,90.39487
33,"Shantinagar, Dhaka",0.0,0,23.7383,90.41358
32,Shankhari Bazaar,0.0,0,23.55,90.5
31,Shahjadpur,0.0,0,23.79048,90.42754
30,Shahbag,0.0,0,23.73545,90.40036
29,Rajarbagh,0.0,0,23.74073,90.41596
28,"Pilkhana, Bangladesh",0.0,0,23.73397,90.37411
26,Nilkhet,0.0,0,23.733215,90.393145
24,"New Market, Dhaka",0.0,0,23.73417,90.38448


#### Cluster 1

In [56]:
df_dhk_merged.loc[df_dhk_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
13,Hatirpool,0.083333,1,23.71323,90.39957
34,Solmaid,0.083333,1,23.71323,90.39957
25,New Paltan,0.083333,1,23.71323,90.39957


#### Cluster 2

In [57]:
df_dhk_merged.loc[df_dhk_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
6,Bangla Bazar,0.032258,2,23.72369,90.40549
4,Banani DOHS,0.02381,2,23.79388,90.39656


#### Cluster 3

In [58]:
df_dhk_merged.loc[df_dhk_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
36,Tikatuli,0.05,3,23.72048,90.42101
23,Narinda,0.05,3,23.71335,90.41659


#### Cluster 4

In [59]:
df_dhk_merged.loc[df_dhk_merged['Cluster Labels'] == 4]

Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
27,Panthapath,0.01,4,23.752739,90.382826
8,Bashundhara Residential Area,0.017241,4,23.81179,90.42119
22,Mohakhali DOHS,0.015385,4,23.783504,90.396142
18,Lalmatia,0.014493,4,23.75649,90.36814
7,Baridhara DOHS,0.012346,4,23.809441,90.412871
38,West Rajabazar,0.01,4,23.75,90.38333


#### Cluster 5

In [60]:
df_dhk_merged.loc[df_dhk_merged['Cluster Labels'] == 5]

Unnamed: 0,Neighborhood,Juice Bar,Cluster Labels,Latitude,Longitude
12,Farashganj,0.1,5,23.704289,90.419128


### Results and Findings

#### Cluster 0:  It is the biggest cluster but has zero concentration of Juice Bars in the neighborhoods.
#### Cluster 1: This cluster has high concentration number of Juice Bars in the neighborhoods
#### Cluster 2 and Cluster 3: Moderate Concentration
#### Cluster 4 and Cluster 5: These clusters are small and has low concentration of Juice Bars in the neighborhoods.