# BATTLE OF NEIGHBORHOODS

## INTRODUCTION

Opening a Indian restaurant in Bangalore, India

In [1]:
# Installing all libraries
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

#!conda install -c conda-forge geopy --yes 
#!conda install -c conda-forge geocoder --yes
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
import geocoder

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents
import urllib.request #For Connecting to wikipedia page 

print('Folium installed')
print('Libraries imported.')


Folium installed
Libraries imported.


## 1. Getting the Data
The data for all the neighborhoods of Bangalore is scrapped from wikipedia page and created into DataFrame along with its geogrphical location from geopy librariy

In [2]:
url='https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Bangalore'
page=urllib.request.urlopen(url)
page

<http.client.HTTPResponse at 0x2b8378c89a0>

In [3]:
# Parsing the html page and appending data into list
soup=BeautifulSoup(page,'html.parser')
neighborhoodList=[]
for i in soup.find_all("div", class_="mw-category")[0].findAll("li"):
  neighborhoodList.append(i.text)


#Creating a new dataFrame
neighbor_df=pd.DataFrame({'Neighborhood':neighborhoodList})
neighbor_df.head()

Unnamed: 0,Neighborhood
0,List of areas in Bangalore Cantonment
1,List of areas in Bengaluru Pete
2,List of neighbourhoods in Bangalore
3,Adugodi
4,"Agara, Bangalore"


In [4]:
# Droping The extra Three Rows // or more precisely cleaning data
neighbor_df=neighbor_df.drop(neighbor_df.index[[0,1,2]])
neighbor_df.reset_index(drop=True,inplace=True)
neighbor_df.head()

Unnamed: 0,Neighborhood
0,Adugodi
1,"Agara, Bangalore"
2,Ananthnagar
3,Anjanapura
4,Arekere


### 1.1 Getting the coordinates of the neighborhoods from geopy and gecoder

In [5]:

# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Bangalore, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [6]:
coords = [ get_latlng(neighborhood) for neighborhood in neighbor_df["Neighborhood"].tolist() ]

In [7]:
coords

[[12.944020000000023, 77.60800000000006],
 [12.842830000000049, 77.48759000000007],
 [12.954080000000033, 77.54135000000008],
 [12.858110000000067, 77.55909000000008],
 [12.885680000000036, 77.59668000000005],
 [12.963480000000061, 77.61297000000008],
 [13.02753000000007, 77.65049000000005],
 [13.07728933897749, 77.65760586439058],
 [13.044710000000066, 77.55008000000004],
 [12.922310000000039, 77.56988000000007],
 [13.019646729517714, 77.65469401219607],
 [12.938980000000072, 77.57137000000006],
 [12.992220000000032, 77.53444000000007],
 [12.927350000000047, 77.67185000000006],
 [12.960530000000063, 77.64385000000004],
 [13.083010000000058, 77.54779000000008],
 [12.900090000000034, 77.60433000000006],
 [12.903080000000045, 77.62444000000005],
 [12.817530000000033, 77.67879000000005],
 [12.993330000000071, 77.66123000000005],
 [12.91488000000004, 77.61004000000008],
 [12.951940000000036, 77.54445000000004],
 [12.913110000000074, 77.71565000000004],
 [12.959697329772126, 77.571311610841

In [8]:
#Creating the dataframe from the coordinates
coordinate_df=pd.DataFrame(coords,columns=['Latitude','Longitude'])
coordinate_df.head()

Unnamed: 0,Latitude,Longitude
0,12.94402,77.608
1,12.84283,77.48759
2,12.95408,77.54135
3,12.85811,77.55909
4,12.88568,77.59668


In [9]:
#Merging the coordinates to the neighborhood dataset
neighbor_df['Latitude']=coordinate_df['Latitude']
neighbor_df['Longitude']=coordinate_df['Longitude']
print(neighbor_df.shape)
neighbor_df.head()

(136, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Adugodi,12.94402,77.608
1,"Agara, Bangalore",12.84283,77.48759
2,Ananthnagar,12.95408,77.54135
3,Anjanapura,12.85811,77.55909
4,Arekere,12.88568,77.59668


In [10]:
# Getting the coordinates of Bangalore
geolocator=Nominatim(user_agent='new_app')
location=geolocator.geocode('Bangalore, India')
latitude=location.latitude
longitude=location.longitude
print("The geographical Coordinate of Bangalore is {},{}:".format(latitude,longitude))

The geographical Coordinate of Bangalore is 12.9791198,77.5912997:


### 1.3 Creating a Map of all the neighborhood in Bangalore,India

In [11]:
# Creating the map and visualizing the neighboorhoods on the folium map
bnglr_map=folium.Map(location=[latitude,longitude],zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(neighbor_df['Latitude'], neighbor_df['Longitude'], neighbor_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(bnglr_map)  
bnglr_map

## 2. Exploring the Neighborhoods from Foursquare api

In [12]:
CLIENT_ID='ISK0UL44IN2TZQRWOBIFTRVOL24ANJEWFF4WRLEZQLDSW031'
CLIENT_SECRET= 'DUIIKTSFKOINBQZKODLTCVMXVPUP4LO5K0MKRL0L2N5K1JFM'
VERSION='20180605'
print("Your Credentials:")
print("CLIENT_ID="+CLIENT_ID)
print("CLIENT_SECRET="+CLIENT_SECRET)

Your Credentials:
CLIENT_ID=ISK0UL44IN2TZQRWOBIFTRVOL24ANJEWFF4WRLEZQLDSW031
CLIENT_SECRET=DUIIKTSFKOINBQZKODLTCVMXVPUP4LO5K0MKRL0L2N5K1JFM


In [13]:
radius = 2000
LIMIT = 100

venues = []

for lat, lng, neighborhood in zip(neighbor_df['Latitude'], neighbor_df['Longitude'], neighbor_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        lng,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            lng, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

### 2.1 Creating the venues DataFrame

In [14]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head(10)

(7056, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Adugodi,12.94402,77.608,PVR IMAX,12.934595,77.611321,Movie Theater
1,Adugodi,12.94402,77.608,Tommy Hilfiger,12.934552,77.611347,Clothing Store
2,Adugodi,12.94402,77.608,Lot Like Crêpes,12.936421,77.613284,Creperie
3,Adugodi,12.94402,77.608,Koramangala Social,12.935518,77.614097,Lounge
4,Adugodi,12.94402,77.608,Dyu Art Cafe,12.937289,77.617591,Financial or Legal Service
5,Adugodi,12.94402,77.608,Truffles Ice & Spice,12.933443,77.614265,Burger Joint
6,Adugodi,12.94402,77.608,Zingron - Naga Kitchen,12.936271,77.615051,Indian Restaurant
7,Adugodi,12.94402,77.608,XOOX Brewmill,12.935507,77.614982,Brewery
8,Adugodi,12.94402,77.608,PVR Cinemas,12.934389,77.611184,Multiplex
9,Adugodi,12.94402,77.608,Mango Tree,12.933639,77.610984,Indian Restaurant


In [15]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adugodi,100,100,100,100,100,100
Ananthnagar,24,24,24,24,24,24
Anjanapura,6,6,6,6,6,6
Arekere,79,79,79,79,79,79
Austin Town,100,100,100,100,100,100
...,...,...,...,...,...,...
"Whitefield, Bangalore",69,69,69,69,69,69
Wilson Garden,71,71,71,71,71,71
Yelachenahalli,48,48,48,48,48,48
Yelahanka,27,27,27,27,27,27


### 2.2 checking the total number of unique categories

In [16]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 234 uniques categories.


In [17]:
venues_df['VenueCategory'].unique()[:50]

array(['Movie Theater', 'Clothing Store', 'Creperie', 'Lounge',
       'Financial or Legal Service', 'Burger Joint', 'Indian Restaurant',
       'Brewery', 'Multiplex', 'Breakfast Spot', 'Café', 'Tea Room',
       'Bar', 'Mobile Phone Shop', 'Dessert Shop', 'Donut Shop', 'Gym',
       'Shopping Mall', 'Coffee Shop', 'Juice Bar', 'Ice Cream Shop',
       'Chinese Restaurant', 'Bakery', 'Gaming Cafe', 'Yoga Studio',
       'Smoke Shop', "Men's Store", 'Italian Restaurant', 'Arcade',
       'Indian Sweet Shop', 'Bookstore', 'Kerala Restaurant',
       'Snack Place', 'Andhra Restaurant', 'Pizza Place',
       'Mexican Restaurant', 'Fast Food Restaurant', 'Cosmetics Shop',
       'Punjabi Restaurant', 'Bowling Alley', 'Comfort Food Restaurant',
       'Tibetan Restaurant', 'Eastern European Restaurant',
       'Persian Restaurant', 'Thai Restaurant',
       'Paper / Office Supplies Store', 'Seafood Restaurant',
       'Restaurant', 'Sandwich Place', 'Liquor Store'], dtype=object)

## 3. Analyzing each neighborhoods

In [18]:

# one hot encoding
bangalore_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bangalore_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [bangalore_onehot.columns[-1]] + list(bangalore_onehot.columns[:-1])
bangalore_onehot = bangalore_onehot[fixed_columns]

print(bangalore_onehot.shape)
bangalore_onehot.head(20)

(7056, 235)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,...,Travel & Transport,Turkish Restaurant,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Adugodi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
bangalore_grouped = bangalore_onehot.groupby(["Neighborhoods"]).sum().reset_index()
#df.group/by("state")["last_name"].count()

#kl_onehot.head()
print(bangalore_grouped.shape)
bangalore_grouped

(135, 235)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Andhra Restaurant,Arcade,Art Gallery,Art Museum,...,Travel & Transport,Turkish Restaurant,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Adugodi,0,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,1
1,Ananthnagar,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Anjanapura,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Arekere,0,0,0,0,1,1,0,0,0,...,0,0,0,1,0,0,0,1,0,0
4,Austin Town,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,"Whitefield, Bangalore",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
131,Wilson Garden,0,0,0,0,0,0,1,0,0,...,0,0,0,2,0,0,1,0,0,0
132,Yelachenahalli,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
133,Yelahanka,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### 3.1 Creating only Indian restaurant DataFrame

In [20]:
len(bangalore_grouped[bangalore_grouped["Indian Restaurant"]>0])

118

In [21]:
restaurant_df=bangalore_grouped[['Neighborhoods','Indian Restaurant']]
print(restaurant_df.shape)
restaurant_df

(135, 2)


Unnamed: 0,Neighborhoods,Indian Restaurant
0,Adugodi,17
1,Ananthnagar,1
2,Anjanapura,0
3,Arekere,9
4,Austin Town,9
...,...,...
130,"Whitefield, Bangalore",7
131,Wilson Garden,9
132,Yelachenahalli,8
133,Yelahanka,4


## 4. Clustering the neighborhoods

Using KMeans clustering to cluster the Bangalore data into 4 clusters

In [22]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 4

bnglr_clustering = restaurant_df.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(bnglr_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 2, 2, 0, 0, 1, 0, 2, 2, 1])

In [23]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
bnglr_merged = restaurant_df.copy()

# add clustering labels
bnglr_merged["Cluster Labels"] = kmeans.labels_

In [24]:
bnglr_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
bnglr_merged.head(10)

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels
0,Adugodi,17,3
1,Ananthnagar,1,2
2,Anjanapura,0,2
3,Arekere,9,0
4,Austin Town,9,0
5,BTM Layout,21,1
6,Babusapalya,7,0
7,"Bagalur, Bangalore Urban",0,2
8,Bahubalinagar,4,2
9,Banashankari,21,1


In [25]:
bnglr_merged['Latitude']=coordinate_df['Latitude']
bnglr_merged['Longitude']=coordinate_df['Longitude']
bnglr_merged.head()

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Adugodi,17,3,12.94402,77.608
1,Ananthnagar,1,2,12.84283,77.48759
2,Anjanapura,0,2,12.95408,77.54135
3,Arekere,9,0,12.85811,77.55909
4,Austin Town,9,0,12.88568,77.59668


In [26]:
print(bnglr_merged.shape)
bnglr_merged.sort_values(["Cluster Labels"], inplace=True)
bnglr_merged

(135, 5)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
118,Tannery Road,10,0,12.88813,77.72807
19,Brookefield,6,0,12.99333,77.66123
78,Kundalahalli,11,0,12.89819,77.55927
98,R. T. Nagar,9,0,12.89434,77.58507
99,Rajajinagar,9,0,13.02445,77.59590
...,...,...,...,...,...
33,Ejipura,16,3,12.94329,77.65602
108,Sanjaynagar,13,3,13.06271,77.58550
31,Dollars Colony,13,3,12.88833,77.61764
55,J. P. Nagar,16,3,12.92344,77.54284


## 5. Visualizing the clustering into Folium map 

In [27]:
import matplotlib.cm as cm
import matplotlib.colors as colors


In [28]:
cluster_map=folium.Map(location=[latitude,longitude],zoom_start=11)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bnglr_merged['Latitude'], bnglr_merged['Longitude'], bnglr_merged['Neighborhood'], bnglr_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(cluster_map)
       
cluster_map

## 6. Analyzing each cluster

### 6.1 Cluster 0

In [29]:
bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
118,Tannery Road,10,0,12.88813,77.72807
19,Brookefield,6,0,12.99333,77.66123
78,Kundalahalli,11,0,12.89819,77.55927
98,R. T. Nagar,9,0,12.89434,77.58507
99,Rajajinagar,9,0,13.02445,77.5959
45,HSR Layout,11,0,12.94372,77.5612
81,"Mahadevapura, Bangalore",7,0,12.92052,77.6209
83,Malleswaram,9,0,13.01635,77.54481
44,HBR Layout,10,0,12.90968,77.55675
54,Ittamadu,7,0,13.03006,77.49526


In [30]:
print(len(bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 0]))

34


### 6.2 Cluster 1

In [31]:
bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
59,"Jayanagar, Bangalore",23,1,13.0545,77.52658
95,Palace Guttahalli,20,1,12.91814,77.5594
87,Milk Colony,21,1,13.03235,77.55866
36,Gandhi Bazaar,24,1,12.99894,77.61276
35,"Fraser Town, Bangalore",21,1,12.84547,77.6643
5,BTM Layout,21,1,12.96348,77.61297
9,Banashankari,21,1,12.92231,77.56988
11,Basavanagudi,21,1,12.93898,77.57137
125,Vasanth Nagar,19,1,12.94348,77.74703
21,CV Raman Nagar,18,1,12.95194,77.54445


In [32]:
len(bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 1])

17

### 6.3 Cluster 2

In [33]:
bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
77,Kumaraswamy Layout,3,2,13.00039,77.68368
82,Mahalakshmi Layout,2,2,12.99409,77.66633
76,Krishnarajapuram,2,2,12.92004,77.62546
85,Mariyannapalya,2,2,12.95466,77.70752
91,Nagarbhavi,1,2,13.04103,77.55148
67,Kalyan Nagar,4,2,13.03403,77.67511
93,Nayandahalli,0,2,13.01481,77.53891
96,Peenya,1,2,12.995508,77.574772
100,"Rajarajeshwari Nagar, Bangalore",0,2,13.00544,77.55693
102,Ramagondanahalli,0,2,13.06124,77.55112


In [34]:
len(bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 2])

56

 ### 6.4 Cluster 3

In [35]:
bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 3]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
17,Bommanahalli,16,3,12.90308,77.62444
122,Ulsoor,14,3,12.93392,77.56818
14,Bengaluru Pete,14,3,12.96053,77.64385
27,Cooke Town,14,3,13.00363,77.62283
28,"Cox Town, Bangalore",14,3,12.99594,77.62644
116,Suddaguntepalya,14,3,12.956417,77.63839
16,Bilekahalli,14,3,12.90009,77.60433
115,"Statue of Queen Victoria, Bangalore",12,3,12.96618,77.5869
72,"Kodihalli, Bangalore",12,3,13.06724,77.56992
111,"Shivajinagar, Bangalore",15,3,12.8326,77.65685


In [36]:
len(bnglr_merged.loc[bnglr_merged['Cluster Labels'] == 3])

28

# Final Observation

### By analyzing each cluster it gives the output that cluster 0 have 34 Indian restaurant across the city, and cluster2 being the most in number i.e 54 Indian restaurants,So This projects states that to get out the maximum profit and getting the best result t open a new Indian restaurant someone should consider oepning it in cluster1 area because it has the lowest numbers of rstaurant int that area.