   # IBM Applied Data Science Capstone Project
   
## Opening a New icecream shop in Bangalore,India

   * Build a dataframe of neighborhoods in Bangalore, India by web scraping the data from Wikipedia page
   * Get the geographical coordinates of the neighborhoods
   * Obtain the venue data for the neighborhoods from Foursquare API
   * Explore and cluster the neighborhoods
   * Select the best cluster to open a new icecream shop
    

### 1. Importing all libraries

In [3]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

#!conda install -c conda-forge geocoder --yes
import geocoder

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

print("Libraries imported.")

Libraries imported.


### 2. Scrap data from Wikipedia page into a DataFrame

In [40]:
data=pd.read_html('https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Bangalore')
blr=pd.DataFrame(data[0].append(data[1]).append(data[2]).append(data[3]).append(data[4]).append(data[5]).append(data[6]).append(data[7])).reset_index()
blr.drop(['index','Image','Summary'],1,inplace=True)
blr.rename(columns={"Name":"Neighborhood"},inplace=True)
print(blr.shape)
blr.head()


(65, 1)


Unnamed: 0,Neighborhood
0,Cantonment area
1,Domlur
2,Indiranagar
3,Jeevanbheemanagar
4,Malleswaram


###  Load the coordinates for neighborhoods

In [48]:

def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Bangalore , India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [50]:
latlong = [ get_latlng(neighborhood) for neighborhood in blr["Neighborhood"].tolist() ]
latlong

[[28.651000000000067, 77.17535000000004],
 [12.943290000000047, 77.65602000000007],
 [13.030060000000049, 77.49526000000003],
 [12.96601000000004, 77.65767000000005],
 [13.006322454623541, 77.56841583882036],
 [12.966180000000065, 77.58690000000007],
 [13.014830000000075, 77.57771000000008],
 [12.993550000000027, 77.57988000000006],
 [12.98720000000003, 77.60401000000007],
 [12.989080000000058, 77.62795000000006],
 [12.990730000000042, 77.58861000000007],
 [12.927350000000047, 77.67185000000006],
 [12.979018801404825, 77.65613753365203],
 [12.99198000000007, 77.71506000000005],
 [13.000390000000039, 77.68368000000004],
 [12.994090000000028, 77.66633000000007],
 [12.954660000000047, 77.70752000000005],
 [12.943480000000022, 77.74703000000005],
 [12.975230000000067, 77.75238000000007],
 [13.019643510687336, 77.65469211693214],
 [13.02642000000003, 77.62437000000006],
 [13.038700000000063, 77.66192000000007],
 [12.968020000000024, 77.52114000000006],
 [13.014300000000048, 77.6368500000000

In [52]:
blr_coord = pd.DataFrame(latlong, columns=['Latitude', 'Longitude'])
blr['Latitude']=blr_coord['Latitude']
blr['Longitude']=blr_coord['Longitude']

In [53]:
print(blr.shape)
blr.head()

(65, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Cantonment area,28.651,77.17535
1,Domlur,12.94329,77.65602
2,Indiranagar,13.03006,77.49526
3,Jeevanbheemanagar,12.96601,77.65767
4,Malleswaram,13.006322,77.568416


###  Use geopy library to get the latitude and longitude values of Bangalore

In [54]:
#!conda install -c conda-forge geopy --yes 
address = 'Bangalore'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bangalore are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bangalore are 12.9791198, 77.5912997.


###  Create a map of Bangalore with neighborhoods superimposed on top

In [57]:
map_blr = folium.Map(location=[latitude, longitude], zoom_start=10)

# adding markers to map
for lat, lng, neighborhood in zip(blr['Latitude'], blr['Longitude'], blr['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_blr)  
    
map_blr

###  Using the Foursquare API to explore the neighborhoods in selected borough in Toronto

In [58]:
# The code was removed by Watson Studio for sharing.

### Getting the top 300 venues that are within a radius of 5000 meters.

In [147]:
radius = 5000
LIMIT = 300

venues = []

for lat, long, neighborhood in zip(blr['Latitude'], blr['Longitude'],blr['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [149]:
# converting the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(5724, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Cantonment area,28.651,77.17535,Lantern's,28.643183,77.177746,Bar
1,Cantonment area,28.651,77.17535,Jaypee Siddharth,28.642483,77.175543,Hotel
2,Cantonment area,28.651,77.17535,Roshan Di Kulfi | रोशन दी क़ुल्फ़ी,28.650428,77.192391,Snack Place
3,Cantonment area,28.651,77.17535,Dunkin',28.645402,77.17258,Donut Shop
4,Cantonment area,28.651,77.17535,Raviraj Ki Kulfi,28.649359,77.190215,Dessert Shop


### Checking how many venues were returned for each Neighborhood


In [197]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Anjanapura,46,46,46,46,46,46
Arekere,100,100,100,100,100,100
BTM Layout,100,100,100,100,100,100
Banashankari,100,100,100,100,100,100
Banaswadi,78,78,78,78,78,78
Basavanagudi,100,100,100,100,100,100
Basaveshwaranagar,100,100,100,100,100,100
Begur,100,100,100,100,100,100
Bellandur,100,100,100,100,100,100
Bommanahalli,100,100,100,100,100,100


#### Checking number of unique venue categories in this data

In [151]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 183 uniques categories.


In [196]:
venues_df['VenueCategory'].unique()[:20]

array(['Bar', 'Hotel', 'Snack Place', 'Donut Shop', 'Dessert Shop',
       'Food & Drink Shop', 'Fast Food Restaurant', 'Sandwich Place',
       'Smoke Shop', 'Indian Restaurant', 'Ice Cream Shop', 'Plaza',
       'Bakery', 'South Indian Restaurant', 'BBQ Joint',
       'Spiritual Center', 'Tibetan Restaurant', 'Coffee Shop',
       'Food Truck', 'Playground'], dtype=object)

###  Now analyzing all areas

In [153]:
# one hot encoding
blr_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe

blr_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhoods column to the first column
fixed_columns = list(blr_onehot.columns[-1:]) + list(blr_onehot.columns[:-1])
blr_onehot = blr_onehot[fixed_columns]

print(blr_onehot.shape)
blr_onehot.head()

(5724, 184)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Badminton Court,Bakery,Bar,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bistro,Boarding House,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Bus Station,Butcher,Cafeteria,Café,Candy Store,Chaat Place,Chettinad Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Creperie,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Financial or Legal Service,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,General Entertainment,German Restaurant,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,History Museum,Hotel,Hotel Bar,Hyderabadi Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kebab Restaurant,Kerala Restaurant,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Motorcycle Shop,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Music Venue,Neighborhood,Nightclub,North Indian Restaurant,Office,Outlet Store,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Plaza,Portuguese Restaurant,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Recreation Center,Resort,Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tea Room,Tech Startup,Tex-Mex Restaurant,Thai Restaurant,Theater,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Cantonment area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Cantonment area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Cantonment area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Cantonment area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Cantonment area,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Grouping data and taking their mean

In [154]:
blr_grouped = blr_onehot.groupby(['Neighborhoods']).mean().reset_index()
print(blr_grouped.shape)
blr_grouped.head()

(65, 184)


Unnamed: 0,Neighborhoods,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Badminton Court,Bakery,Bar,Bed & Breakfast,Beer Garden,Bengali Restaurant,Big Box Store,Bistro,Boarding House,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bubble Tea Shop,Building,Burger Joint,Bus Station,Butcher,Cafeteria,Café,Candy Store,Chaat Place,Chettinad Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Concert Hall,Convenience Store,Creperie,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Financial or Legal Service,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gas Station,General Entertainment,German Restaurant,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Halal Restaurant,History Museum,Hotel,Hotel Bar,Hyderabadi Restaurant,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karnataka Restaurant,Kebab Restaurant,Kerala Restaurant,Korean Restaurant,Lake,Light Rail Station,Lighthouse,Liquor Store,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Molecular Gastronomy Restaurant,Monument / Landmark,Motorcycle Shop,Movie Theater,Multicuisine Indian Restaurant,Multiplex,Music Venue,Neighborhood,Nightclub,North Indian Restaurant,Office,Outlet Store,Pakistani Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Pharmacy,Pizza Place,Playground,Plaza,Portuguese Restaurant,Pub,Punjabi Restaurant,Racetrack,Rajasthani Restaurant,Recreation Center,Resort,Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Spiritual Center,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Szechuan Restaurant,Tea Room,Tech Startup,Tex-Mex Restaurant,Thai Restaurant,Theater,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Anjanapura,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.021739,0.021739,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.086957,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.130435,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0
1,Arekere,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.03,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.09,0.0,0.13,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.01,0.0,0.03,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0
2,BTM Layout,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.03,0.03,0.0,0.0,0.02,0.0,0.0,0.0,0.05,0.01,0.0,0.0,0.01,0.0,0.02,0.0,0.05,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.1,0.0,0.08,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01
3,Banashankari,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.06,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.06,0.0,0.18,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.01,0.01,0.02,0.0,0.03,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
4,Banaswadi,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.012821,0.0,0.012821,0.025641,0.0,0.0,0.012821,0.012821,0.012821,0.0,0.064103,0.0,0.0,0.0,0.025641,0.0,0.0,0.012821,0.038462,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.025641,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.064103,0.0,0.089744,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.012821,0.0,0.0,0.038462,0.0,0.012821,0.0,0.0,0.025641,0.0,0.0,0.012821,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.064103,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0


### Sepearting only neighborhoods and ice cream shops

In [155]:
print(len(blr_grouped[blr_grouped["Ice Cream Shop"] > 0]))
blr_icecream = blr_grouped[["Neighborhoods","Ice Cream Shop"]]
blr_icecream.head()

63


Unnamed: 0,Neighborhoods,Ice Cream Shop
0,Anjanapura,0.021739
1,Arekere,0.09
2,BTM Layout,0.1
3,Banashankari,0.06
4,Banaswadi,0.064103


In [156]:
temp=[]
for neighborhood,VenueName, category in zip(venues_df['Neighborhood'],venues_df['VenueName'],venues_df['VenueCategory']):
    if(category==("Ice Cream Shop")):
        temp.append([neighborhood ,VenueName, category])
icecream=pd.DataFrame(temp)
icecream.rename(columns={0:"Neighborhood",1:"VenueName",2:"VenueCategory"},inplace=True)
print(icecream.shape)
icecream.head()

(380, 3)


Unnamed: 0,Neighborhood,VenueName,VenueCategory
0,Cantonment area,Naturals Ice Cream,Ice Cream Shop
1,Domlur,Natural's,Ice Cream Shop
2,Domlur,Corner House,Ice Cream Shop
3,Domlur,Corner House,Ice Cream Shop
4,Domlur,cream stone,Ice Cream Shop


### Observation untill now:
#### There are 63 neighborhood in bangalore out of 65 which has total 380 icecream shops

###  Clustering Neighborhoods

In [182]:
# set number of clusters
kclusters = 9
blr_icecream_clustering = blr_icecream.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(blr_icecream_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:]

array([1, 3, 7, 5, 5, 6, 5, 0, 6, 7, 1, 6, 4, 7, 1, 6, 8, 5, 3, 7, 5, 8,
       2, 4, 0, 8, 5, 0, 0, 6, 3, 5, 3, 2, 5, 3, 3, 3, 5, 0, 6, 2, 6, 6,
       0, 8, 2, 6, 5, 0, 5, 2, 8, 0, 6, 5, 3, 1, 8, 2, 6, 4, 2, 0, 0],
      dtype=int32)

In [183]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
blr_merged = blr_icecream.copy()

# add clustering labels
blr_merged["Cluster Labels"] = kmeans.labels_

#blr_merged = blr_merged.join(icecream.drop(["VenueCategory"], 1).set_index("Neighborhood"), on="Neighborhood")


print(blr_merged.shape)
blr_merged.head()

(65, 3)


Unnamed: 0,Neighborhoods,Ice Cream Shop,Cluster Labels
0,Anjanapura,0.021739,1
1,Arekere,0.09,3
2,BTM Layout,0.1,7
3,Banashankari,0.06,5
4,Banaswadi,0.064103,5


In [184]:
# sort the results by Cluster Labels
print(blr_merged.shape)
blr_merged.sort_values(["Cluster Labels"], inplace=True)
blr_merged.rename(columns={"Neighborhoods":"Neighborhood"},inplace=True)
blr_merged.head()

(65, 3)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels
64,Yeshwanthpur,0.08,0
53,Sadashivanagar,0.08,0
49,R. T. Nagar,0.08,0
44,Nandini Layout,0.08,0
39,Mahalakshmi Layout,0.08,0


In [185]:
blr_merged = blr_merged.join(blr.set_index("Neighborhood"), on="Neighborhood")

### mapping final result

In [186]:

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat,lon,poi, cluster in zip(blr_merged['Latitude'],blr_merged['Longitude'],blr_merged['Neighborhood'],blr_merged['Cluster Labels']):
    label = folium.Popup('{} - Cluster {}'.format(poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Areas

In [187]:
#cluster 1
print(blr_merged.loc[blr_merged['Cluster Labels'] == 0].shape)

blr_merged.loc[blr_merged['Cluster Labels'] == 0]


(10, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
64,Yeshwanthpur,0.08,0,13.02954,77.54022
53,Sadashivanagar,0.08,0,13.01483,77.57771
49,R. T. Nagar,0.08,0,13.02445,77.5959
44,Nandini Layout,0.08,0,13.01481,77.53891
39,Mahalakshmi Layout,0.08,0,13.01635,77.54481
63,Yelahanka,0.083333,0,13.09931,77.59259
7,Begur,0.08,0,12.88245,77.62475
28,Kalyan Nagar,0.083333,0,12.96802,77.52114
27,Jeevanbheemanagar,0.08,0,12.96601,77.65767
24,J. P. Nagar,0.08,0,12.90831,77.59024


In [188]:
#cluster 2
print(blr_merged.loc[blr_merged['Cluster Labels'] == 1].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 1]

(4, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
57,Uttarahalli,0.023256,1,12.89757,77.5283
14,Electronic City,0.019608,1,12.84547,77.6643
0,Anjanapura,0.021739,1,12.85811,77.55909
10,Bommasandra,0.019608,1,12.81753,77.67879


In [189]:
#cluster 3
print(blr_merged.loc[blr_merged['Cluster Labels'] == 2].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 2]

(7, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
62,Whitefield,0.05,2,12.97523,77.75238
59,Vasanth Nagar,0.05,2,12.99073,77.58861
33,Kothnur,0.044444,2,13.06434,77.64855
41,Marathahalli,0.05,2,12.95466,77.70752
46,Padmanabhanagar,0.05,2,12.91814,77.5594
51,Rajarajeshwari Nagar,0.05,2,12.93178,77.52668
22,Hulimavu,0.052083,2,12.88064,77.60147


In [190]:
#cluster 4
print(blr_merged.loc[blr_merged['Cluster Labels'] == 3].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 3]

(8, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
37,Madiwala,0.09,3,12.92052,77.6209
30,Kammanahalli,0.09,3,13.0143,77.63685
35,Kumaraswamy Layout,0.09,3,12.89819,77.55927
32,Koramangala,0.09,3,12.92004,77.62546
56,Ulsoor,0.09,3,12.98908,77.62795
18,HSR Layout,0.09,3,12.91216,77.6449
1,Arekere,0.09,3,12.88568,77.59668
36,Lingarajapuram,0.09,3,13.00548,77.62597


In [191]:
#cluster 5
print(blr_merged.loc[blr_merged['Cluster Labels'] == 4].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 4]

(3, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
12,Cantonment area,0.01,4,28.651,77.17535
61,Vijayanagar,0.0,4,13.076,77.65238
23,Indiranagar,0.0,4,13.03006,77.49526


In [192]:
#cluster 6
print(blr_merged.loc[blr_merged['Cluster Labels'] == 5].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 5]

(12, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
50,Rajajinagar,0.06,5,13.00544,77.55693
3,Banashankari,0.06,5,12.92231,77.56988
48,Pete area,0.06,5,12.96618,77.5869
55,Shivajinagar,0.06,5,12.9872,77.60401
4,Banaswadi,0.064103,5,13.019644,77.654692
38,Mahadevapura,0.06,5,12.99409,77.66633
6,Basaveshwaranagar,0.06,5,12.99222,77.53444
17,HBR Layout,0.06,5,13.02642,77.62437
34,Krishnarajapuram,0.06,5,13.00039,77.68368
26,Jayanagar,0.06,5,12.92872,77.58281


In [193]:
#cluster 7
print(blr_merged.loc[blr_merged['Cluster Labels'] == 6].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 6]

(11, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
43,Nagarbhavi,0.074074,6,12.95624,77.50936
29,Kamakshipalya,0.07,6,12.98699,77.52484
11,CV Raman Nagar,0.07,6,12.979019,77.656138
54,Seshadripuram,0.07,6,12.99355,77.57988
47,Peenya,0.07,6,13.03185,77.52679
15,Girinagar,0.07,6,12.94279,77.54122
60,Vidyaranyapura,0.072727,6,13.07927,77.55914
40,Malleswaram,0.07,6,13.006322,77.568416
5,Basavanagudi,0.07,6,12.93898,77.57137
42,Mathikere,0.07,6,13.03235,77.55866


In [194]:
#cluster 8
print(blr_merged.loc[blr_merged['Cluster Labels'] == 7].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 7]

(4, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
19,Hebbal,0.1,7,13.04981,77.58903
13,Domlur,0.1,7,12.94329,77.65602
9,Bommanahalli,0.11,7,12.90308,77.62444
2,BTM Layout,0.1,7,12.91488,77.61004


In [195]:
#cluster 9
print(blr_merged.loc[blr_merged['Cluster Labels'] == 8].shape)
blr_merged.loc[blr_merged['Cluster Labels'] == 8]

(6, 5)


Unnamed: 0,Neighborhood,Ice Cream Shop,Cluster Labels,Latitude,Longitude
52,Ramamurthy Nagar,0.04,8,13.02382,77.67785
58,Varthur,0.03,8,12.94348,77.74703
16,Gottigere,0.040541,8,12.85568,77.58557
25,Jalahalli,0.033333,8,13.0545,77.52658
45,Nayandahalli,0.033708,8,12.94205,77.521
21,Horamavu,0.041096,8,13.0387,77.66192


### Observations:
#### Cluster 2 and 5 is area with least number of ice cream shops, 
#### Cluster 3,6,9 has moderate number of icecream shops, and 
#### Cluster 1,4,7,8 has maximum number of icecream shops
#### So, it is more likely that one should choose 1 out of 7 neighborhood areas among cluster 2 and 5 for opening a new icecream shop

# Thank You