# Import Libraries

In [1]:
import numpy as np 
import pandas as pd 
import json 

from geopy.geocoders import Nominatim 
import folium
import geocoder
import requests 
from pandas.io.json import json_normalize

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

# Read Data

In [2]:
blr_df = pd.read_csv('blr_df_new.csv')

In [3]:
#This holds a dataset of Areas of Bangalore along with their respective coordinates
blr_df.head()

Unnamed: 0,Area,Latitude,Longitude
0,A F Station Yelahanka,13.12682,77.61066
1,Agram,12.9984,77.57169
2,Amruthahalli,13.06684,77.5951
3,Anandnagar,12.96348,77.70202
4,Arabic College,13.03344,77.619345


# Create a map of Bangalore

In [4]:
#Find coordinates of Bangalore 
address = 'Bangalore, India'

geolocator = Nominatim(user_agent="coursera")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Bangalore, India is {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Bangalore, India is 12.9791198, 77.5912997.


In [5]:
#Create a map instance using folium
map_blr = folium.Map(location=[latitude, longitude], zoom_start=11)

#Add details into the map from the dataframe
for lat, lng, Area in zip(blr_df['Latitude'], blr_df['Longitude'], blr_df['Area']):
    label = '{}'.format(Area)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_blr)  
    
map_blr
#This shows all the neighborhoods/areas in Bangalore

# Using Foursquare API find venues

In [6]:
#Define Foursquare Credentials and Version
CLIENT_ID = 'ZK40VO3EPO1ZY5C04PNYSMQH2ZFZ3U1TPL0QME3WEY553DMH' # your Foursquare ID
CLIENT_SECRET = 'RQBRNE2T3OTMYY4BBJ3YWLS0P2FK413O22MQV1QHBHAD04WG' # your Foursquare Secret
VERSION = '20190612' # Foursquare API version

In [7]:
#Radius = 3500 meters
radius = 4000
#Number of venues limited to 100 per area
LIMIT = 100

venues = []

for lat, long, Area in zip(blr_df['Latitude'], blr_df['Longitude'], blr_df['Area']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            Area,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [8]:
#Convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)
venues_df.columns = ['Area', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
venues_df.head()

Unnamed: 0,Area,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,A F Station Yelahanka,13.12682,77.61066,Cafe Potenza,13.121925,77.623036,Café
1,A F Station Yelahanka,13.12682,77.61066,cafe coffee day,13.145366,77.617906,Coffee Shop
2,A F Station Yelahanka,13.12682,77.61066,Cafe Coffee Day,13.094997,77.597301,Café
3,A F Station Yelahanka,13.12682,77.61066,Cafe Coffee Day,13.09939,77.588282,Café
4,A F Station Yelahanka,13.12682,77.61066,A2B restaurant,13.152166,77.620648,Indian Restaurant


In [9]:
venues_df.shape

(6911, 7)

In [10]:
print('There are {} uniques categories.'.format(venues_df['VenueCategory'].nunique()))

There are 204 uniques categories.


In [11]:
#List of all the Categories
venues_df['VenueCategory'].unique()[:-1]

array(['Café', 'Coffee Shop', 'Indian Restaurant', 'Clothing Store',
       'American Restaurant', 'Train Station', 'Smoke Shop', 'Food Truck',
       'Vegetarian / Vegan Restaurant', 'Fast Food Restaurant',
       'South Indian Restaurant', 'Golf Course', 'Ice Cream Shop',
       'Department Store', 'Resort', 'Snack Place', 'Art Gallery', 'Gym',
       'Bakery', 'Hotel', 'Karnataka Restaurant', 'Shopping Mall',
       'Food & Drink Shop', 'Multiplex', 'Gym / Fitness Center',
       'Steakhouse', 'Bowling Alley', 'Racetrack', 'Tea Room', 'Lounge',
       'Movie Theater', 'Pub', 'Seafood Restaurant',
       'Monument / Landmark', 'Donut Shop', 'Park', 'Italian Restaurant',
       'Electronics Store', 'Burger Joint', 'Cricket Ground',
       'French Restaurant', 'Bar', 'Mexican Restaurant',
       'Japanese Restaurant', 'Motorcycle Shop', 'Asian Restaurant',
       'Cupcake Shop', 'Gas Station', 'Boutique', 'Bistro',
       'Chinese Restaurant', 'Thai Restaurant', 'Brewery',
       'Sand

# Clean and expand data

### Create onehot encoded dataframe on VenueCategory

In [12]:
#One hot encoding
blr_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

#Copy Area column
blr_onehot['Area'] = venues_df['Area'] 

#Bring last column to first and rearrange the rest afterwards
fixed_columns = [blr_onehot.columns[-1]] + list(blr_onehot.columns[:-1])
blr_onehot = blr_onehot[fixed_columns]

print(blr_onehot.shape)
blr_onehot.head()

(6911, 205)


Unnamed: 0,Area,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Service,Airport Terminal,American Restaurant,Andhra Restaurant,Arcade,...,Trail,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo
0,A F Station Yelahanka,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,A F Station Yelahanka,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,A F Station Yelahanka,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,A F Station Yelahanka,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,A F Station Yelahanka,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Convert the onehot DF by replaced values grouped-mean

In [13]:
blr_grouped = blr_onehot.groupby(["Area"]).mean().reset_index()

print(blr_grouped.shape)
blr_grouped.head()

(101, 205)


Unnamed: 0,Area,ATM,Accessories Store,Afghan Restaurant,Airport,Airport Service,Airport Terminal,American Restaurant,Andhra Restaurant,Arcade,...,Trail,Train Station,Udupi Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo
0,A F Station Yelahanka,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,...,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0
1,Adugodi,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0
2,Agara,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0
3,Agram,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
4,Amruthahalli,0.0,0.0,0.0,0.018182,0.0,0.0,0.018182,0.0,0.0,...,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Select particular columns based on scenario

In [14]:
blr_night = blr_grouped[["Area",'Brewery','Lounge',"Nightclub", "Pub", "Sports Bar", "Gastropub", "Bistro", "Beer Bar", "Cocktail Bar", "Bar", "Beer Garden"]]

In [15]:
blr_night.head()

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden
0,A F Station Yelahanka,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Adugodi,0.04,0.06,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0
2,Agara,0.01,0.03,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0
3,Agram,0.01,0.05,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0
4,Amruthahalli,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# set number of clusters
kclusters = 4

blr_clustering = blr_night.drop(["Area"], axis = 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(blr_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 2, 0, 0, 1, 0, 1, 3, 0, 1])

In [17]:
#Create a new dataframe that includes the cluster Labels
blr_merged = blr_night.copy()

# add cluster labels
blr_merged["Cluster Labels"] = kmeans.labels_

In [18]:
blr_merged.head()

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels
0,A F Station Yelahanka,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,Adugodi,0.04,0.06,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0,2
2,Agara,0.01,0.03,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0
3,Agram,0.01,0.05,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0
4,Amruthahalli,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [19]:
#Number of clusters
blr_merged['Cluster Labels'].unique()

array([1, 2, 0, 3])

In [20]:
# merge blr_merged with blr_df to get coordinates
blr_merged = blr_merged.join(blr_df.set_index("Area"), on="Area")

print(blr_merged.shape)
blr_merged.head() # check the last columns!

(101, 15)


Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude
0,A F Station Yelahanka,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,13.12682,77.61066
1,Adugodi,0.04,0.06,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0,2,12.94402,77.608
2,Agara,0.01,0.03,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0,12.9231,77.6465
3,Agram,0.01,0.05,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0,12.9984,77.57169
4,Amruthahalli,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,13.06684,77.5951


In [21]:
#Sort the results by Cluster Labels
blr_merged.sort_values(["Cluster Labels"], inplace=True)

In [22]:
blr_merged.reset_index(inplace = True, drop =True)

In [23]:
blr_merged.head()

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude
0,Yeshwanthpur Bazar,0.01,0.03,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0,13.02578,77.55783
1,Basaveshwaranagar,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0,12.99222,77.53444
2,Laggere,0.014286,0.042857,0.0,0.014286,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0,13.00763,77.52165
3,Dommasandra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0,12.88492,77.74721
4,Bhattarahalli,0.014925,0.029851,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0,0,13.02002,77.70978


# View the clusters

In [24]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(blr_merged['Latitude'], blr_merged['Longitude'], blr_merged['Area'], blr_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
      
map_clusters

In [25]:
map_clusters.save("map_c14.html")

# Examine Clusters

#### Cluster 0

In [26]:
cluster_0 = blr_merged.loc[blr_merged['Cluster Labels'] == 0]

In [27]:
cluster_0['Score'] = cluster_0['Brewery']*0.5 + cluster_0['Lounge']*0.5 + cluster_0['Pub']*0.5 + cluster_0['Nightclub']*0.5 + cluster_0['Sports Bar']*0.5+ cluster_0['Gastropub']*0.5+ cluster_0['Bistro']*0.5+ cluster_0['Beer Bar']*0.5+cluster_0['Cocktail Bar']*0.5+cluster_0['Bar']*0.5+cluster_0['Beer Garden']*0.5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [28]:
cluster_0

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
0,Yeshwanthpur Bazar,0.01,0.03,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0,13.02578,77.55783,0.045
1,Basaveshwaranagar,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0,12.99222,77.53444,0.03
2,Laggere,0.014286,0.042857,0.0,0.014286,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0,13.00763,77.52165,0.042857
3,Dommasandra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0,12.88492,77.74721,0.045455
4,Bhattarahalli,0.014925,0.029851,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.014925,0.0,0,13.02002,77.70978,0.037313
5,Jayangar III Block,0.04,0.02,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0,12.93429,77.58277,0.045
6,Jayanagar,0.04,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0,12.92872,77.58281,0.04
7,C.V.Raman Nagar,0.03,0.04,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0,12.98713,77.66386,0.07
8,Carmelram,0.014493,0.028986,0.0,0.014493,0.014493,0.014493,0.0,0.0,0.0,0.028986,0.0,0,12.909638,77.704379,0.057971
9,Chamrajpet,0.01,0.04,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0,12.959697,77.571312,0.045


#### Cluster 1

In [29]:
cluster_1 = blr_merged.loc[blr_merged['Cluster Labels'] == 1]

In [30]:
cluster_1['Score'] = cluster_1['Brewery']*0.5 + cluster_1['Lounge']*0.5 + cluster_1['Pub']*0.5 + cluster_1['Nightclub']*0.5 + cluster_1['Sports Bar']*0.5+ cluster_1['Gastropub']*0.5+ cluster_1['Bistro']*0.5+ cluster_1['Beer Bar']*0.5+cluster_1['Cocktail Bar']*0.5+cluster_1['Bar']*0.5+cluster_1['Beer Garden']*0.5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [31]:
cluster_1

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
34,Gaviopuram Extension,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,1,12.94528,77.56646,0.005
35,Ullalu Upanagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,12.95619,77.48006,0.0
36,Tarabanahalli,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,13.10124,77.48206,0.0
37,HSR Layout,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,1,12.91216,77.6449,0.025
38,Haragadde,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,12.76287,77.65688,0.0
39,G.K.V.K.,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,13.07252,77.59254,0.008333
40,Jalahalli East,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,13.06824,77.54105,0.0
41,Rv Niketan,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,1,12.91969,77.50083,0.030303
42,Peenya Dasarahalli,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,13.06,77.61034,0.010638
43,Jalahalli West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,13.06173,77.53105,0.0


In [32]:
cluster_1.shape

(39, 16)

#### Cluster 2

In [33]:
cluster_2 = blr_merged.loc[blr_merged['Cluster Labels'] == 2]

In [34]:
cluster_2['Score'] = cluster_2['Brewery']*0.5 + cluster_2['Lounge']*0.5 + cluster_2['Pub']*0.5 + cluster_2['Nightclub']*0.5 + cluster_2['Sports Bar']*0.5+ cluster_2['Gastropub']*0.5+ cluster_2['Bistro']*0.5+ cluster_2['Beer Bar']*0.5+cluster_2['Cocktail Bar']*0.5+cluster_2['Bar']*0.5+cluster_2['Beer Garden']*0.5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [35]:
cluster_2

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
73,H.K.P. Road,0.02,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,2,12.988672,77.602683,0.055
74,Austin Town,0.03,0.05,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.02,0.0,2,12.96348,77.61297,0.08
75,Dharmaram College,0.04,0.02,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0,2,12.93675,77.60264,0.06
76,Doddagubbi,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,13.06996,77.67397,0.038462
77,Chickpet,0.02,0.04,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.0,0.0,2,12.97015,77.57589,0.055
78,Science Institute,0.03,0.04,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0,2,12.94385,77.59711,0.065
79,Domlur,0.01,0.02,0.0,0.05,0.0,0.0,0.0,0.0,0.01,0.02,0.0,2,12.94329,77.65602,0.055
80,Sivan Chetty Gardens,0.03,0.04,0.0,0.05,0.0,0.0,0.0,0.0,0.01,0.0,0.0,2,12.98556,77.61518,0.065
81,Fraser Town,0.03,0.04,0.0,0.04,0.0,0.0,0.01,0.0,0.01,0.0,0.0,2,12.99894,77.61276,0.065
82,Adugodi,0.04,0.06,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.01,0.0,2,12.94402,77.608,0.08


In [36]:
cluster_2.shape

(26, 16)

#### Cluster 3

In [37]:
cluster_3 = blr_merged.loc[blr_merged['Cluster Labels'] == 3]

In [38]:
cluster_3['Score'] = cluster_3['Brewery']*0.5 + cluster_3['Lounge']*0.5 + cluster_3['Pub']*0.5 + cluster_3['Nightclub']*0.5 + cluster_3['Sports Bar']*0.5+ cluster_3['Gastropub']*0.5+ cluster_3['Bistro']*0.5+ cluster_3['Beer Bar']*0.5+cluster_3['Cocktail Bar']*0.5+cluster_3['Bar']*0.5+cluster_3['Beer Garden']*0.5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [39]:
cluster_3

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
99,Anjanapura,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,12.85811,77.55909,0.05
100,Thalaghattapura,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,12.8648,77.53289,0.055556


In [40]:
cluster_3.shape

(2, 16)

#### Observations:

In [41]:
print('The score of cluster_0 ranges from {} to {}'.format(min(cluster_0['Score']),max(cluster_0['Score'])))
print('The score of cluster_1 ranges from {} to {}'.format(min(cluster_1['Score']),max(cluster_1['Score'])))
print('The score of cluster_2 ranges from {} to {}'.format(min(cluster_2['Score']),max(cluster_2['Score'])))
print('The score of cluster_3 ranges from {} to {}'.format(min(cluster_3['Score']),max(cluster_3['Score'])))

The score of cluster_0 ranges from 0.030000000000000002 to 0.07
The score of cluster_1 ranges from 0.0 to 0.03125
The score of cluster_2 ranges from 0.038461538461538464 to 0.08000000000000002
The score of cluster_3 ranges from 0.05 to 0.05555555555555555


As we can see `cluster_2` has the maximum value when it comes to 'Score' and `cluster_1` has the minimum

#### SAMPLE CLUSTER_2

In [72]:
#List of all areas in cluster_2
cluster_2['Area']

73                    H.K.P. Road
74                    Austin Town
75              Dharmaram College
76                     Doddagubbi
77                       Chickpet
78              Science Institute
79                         Domlur
80           Sivan Chetty Gardens
81                    Fraser Town
82                        Adugodi
83               Doddakallasandra
84               Sampangiramnagar
85                     Hampinagar
86              Maruthi Sevanagar
87    Bangalore Dist Offices Bldg
88     Bangalore Sub Foreign Post
89                 Lingarajapuram
90                           EPIP
91                    Benson Town
92                    Koramangala
93               Jeevanbhimanagar
94        Bnagalore Viswavidalaya
95                         Bolare
96                    Indiranagar
97                            NAL
98                  Doorvaninagar
Name: Area, dtype: object

### Cluster 2 contains places like Adugodi, Koramangala and Indiranagar which are famous for their large number of pubs and clubs

In [69]:
#Picking up a random area from cluster 2
c2_koramangala = venues_df[(venues_df['Area']=='Koramangala') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [76]:
len(c2_koramangala)

14

In [70]:
#Picking up the area with highest score in cluster 2
c2_indiranagar = venues_df[(venues_df['Area']=='Indiranagar') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [77]:
len(c2_indiranagar)

16

#### SAMPLE  CLUSTER_0

In [71]:
#List of areas in cluster_0
cluster_0['Area']

0                  Yeshwanthpur Bazar
1                   Basaveshwaranagar
2                             Laggere
3                         Dommasandra
4                       Bhattarahalli
5                  Jayangar III Block
6                           Jayanagar
7                     C.V.Raman Nagar
8                           Carmelram
9                          Chamrajpet
10                          J.C.Nagar
11                          J P Nagar
12            ISRO Anthariksha Bhavan
13                              Hoodi
14                         Devasandra
15                     H.A.L II Stage
16                      Gayathrinagar
17                      Doddanekkundi
18                          Bapagrama
19            Mahalakshmipuram Layout
20                          Bellandur
21    Bangalore International Airport
22                     Arabic College
23                         Anandnagar
24                        Malleswaram
25           P&T Col. Kavalbyrasandra
26          

In [81]:
cluster_0[cluster_0['Score']==0.07]

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
7,C.V.Raman Nagar,0.03,0.04,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0,12.98713,77.66386,0.07


In [82]:
#Picking up area with highest score in cluster 0
c0_cvraman = venues_df[(venues_df['Area']=='C.V.Raman Nagar') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [83]:
len(c0_cvraman)

14

In [84]:
#Picking up random area in cluster 0
c0_malleswaram =  venues_df[(venues_df['Area']=='Malleswaram') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [85]:
len(c0_malleswaram)

8

#### SAMPLE CLUSTER_1

In [73]:
#List of areas in cluster_1
cluster_1['Area']

34             Gaviopuram Extension
35                  Ullalu Upanagar
36                    Tarabanahalli
37                       HSR Layout
38                        Haragadde
39                         G.K.V.K.
40                   Jalahalli East
41                       Rv Niketan
42               Peenya Dasarahalli
43                   Jalahalli West
44                     Nayandahalli
45                       Nagarbhavi
46                      Kodigehalli
47                      Kumbalagodu
48                      Magadi Road
49                        Jalahalli
50                 Electronics City
51            A F Station Yelahanka
52                   Vidyaranyapura
53                     Amruthahalli
54                           Anekal
55                       Ashoknagar
56                         Attibele
57                            Attur
58                       Bagalgunte
59                          Bagalur
60           Banashankari III Stage
61                     Banne

In [87]:
cluster_1[cluster_1['Score']==0.03125]

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
45,Nagarbhavi,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,1,12.95624,77.50936,0.03125


In [88]:
#Picking up area with highest score in cluster 1
c1_nagarbhavi = venues_df[(venues_df['Area']=='Nagarbhavi') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [89]:
len(c1_nagarbhavi)

3

In [94]:
cluster_1[cluster_1['Area']=='Begur']

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
66,Begur,0.011494,0.0,0.0,0.011494,0.0,0.0,0.0,0.0,0.0,0.011494,0.011494,1,12.88245,77.62475,0.022989


In [90]:
#Picking up random area in cluster 1
c1_begur = c0_cvraman = venues_df[(venues_df['Area']=='Begur') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [91]:
len(c1_begur)

4

#### SAMPLE CLUSTER_3

In [74]:
#List of areas in cluster_3
cluster_3['Area']

99          Anjanapura
100    Thalaghattapura
Name: Area, dtype: object

In [96]:
cluster_3[cluster_3['Score']==0.05555555555555555]

Unnamed: 0,Area,Brewery,Lounge,Nightclub,Pub,Sports Bar,Gastropub,Bistro,Beer Bar,Cocktail Bar,Bar,Beer Garden,Cluster Labels,Latitude,Longitude,Score
100,Thalaghattapura,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,12.8648,77.53289,0.055556


In [97]:
c3_thalaghattapura = venues_df[(venues_df['Area']=='Thalaghattapura') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [98]:
len(c3_thalaghattapura)

1

In [99]:
c3_anjanapura = venues_df[(venues_df['Area']=='Anjanapura') & ((venues_df['VenueCategory']=='Lounge')|(venues_df['VenueCategory']=='Brewery')|(venues_df['VenueCategory']=='Nightclub')|(venues_df['VenueCategory']=='Pub')|(venues_df['VenueCategory']=='Sports Bar')|(venues_df['VenueCategory']=='Gastropub')|(venues_df['VenueCategory']=='Bistro')|(venues_df['VenueCategory']=='Beer Bar')|(venues_df['VenueCategory']=='Cocktail Bar')|(venues_df['VenueCategory']=='Bar')|(venues_df['VenueCategory']=='Beer Garden'))]

In [100]:
len(c3_anjanapura)

1