## Capstone Project
## IBM Data Science Professional
# **```Opening of a new Indian food restaurant in Pune city```**
### ```Heramb Lonkar```
***

### 1. Import libraries

In [1]:
import numpy as np 
import pandas as pd 
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json 

from geopy.geocoders import Nominatim
from geopy.geocoders import Here #Using OpenStreetMap for co-ordinates
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import seaborn as sns #Using Seaborn for graphs instead of MatPlotLib

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Good to Go!")

Good to Go!


### 2. Scrape data from Wikipedia page into a DataFrame

In [2]:
# send the GET request
data = requests.get("https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Pune").text

In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [4]:
# create a list to store neighborhood data
neighborhoodList = []

In [5]:
#Let's check where is the neighbourhoods list
wiki_list0 = soup.find_all("ul")[0]
wiki_list0

<ul>
<li class="toclevel-1 tocsection-1"><a href="#Old_city"><span class="tocnumber">1</span> <span class="toctext">Old city</span></a></li>
<li class="toclevel-1 tocsection-2"><a href="#Other_areas_under_Pune_Municipal_Corporation"><span class="tocnumber">2</span> <span class="toctext">Other areas under Pune Municipal Corporation</span></a></li>
<li class="toclevel-1 tocsection-3"><a href="#Areas_under_Pimpri-Chinchwad_Municipal_Corporation"><span class="tocnumber">3</span> <span class="toctext">Areas under Pimpri-Chinchwad Municipal Corporation</span></a></li>
<li class="toclevel-1 tocsection-4"><a href="#Areas_around_Vadgaon"><span class="tocnumber">4</span> <span class="toctext">Areas around Vadgaon</span></a></li>
<li class="toclevel-1 tocsection-5"><a href="#Cantonment_areas"><span class="tocnumber">5</span> <span class="toctext">Cantonment areas</span></a></li>
<li class="toclevel-1 tocsection-6"><a href="#References"><span class="tocnumber">6</span> <span class="toctext">Refere

### Above list is not what we want. Let's check again with next list item i.e. [1]

In [6]:
wiki_list1 = soup.find_all("ul")[1]
wiki_list1

<ul><li>Ambegaon</li>
<li><a href="/wiki/Aundh,_Pune" title="Aundh, Pune">Aundh</a></li>
<li><a class="mw-redirect" href="/wiki/Baner,_Pune" title="Baner, Pune">Baner</a></li>
<li><a href="/wiki/Bavdhan" title="Bavdhan">Bavdhan</a></li>
<li><a class="mw-redirect" href="/wiki/Bhamburde" title="Bhamburde">Bhamburde</a> (now called <a href="/wiki/Shivajinagar,_Pune" title="Shivajinagar, Pune">Shivajinagar</a>)</li>
<li>Bibvewadi</li>
<li><a href="/wiki/Balewadi" title="Balewadi">Balewadi</a></li>
<li><a href="/wiki/Dhankawadi" title="Dhankawadi">Dhankawadi</a></li>
<li>Dhanori</li>
<li><a href="/wiki/Dhayari" title="Dhayari">Dhayari</a></li>
<li><a href="/wiki/Erandwane" title="Erandwane">Erandwane</a></li>
<li>Fursungi</li>
<li>Ganesh khind</li>
<li>Ghorpadi</li>
<li><a href="/wiki/Hadapsar" title="Hadapsar">Hadapsar</a></li>
<li>Kalas</li>
<li><a href="/wiki/Katraj" title="Katraj">Katraj</a></li>
<li><a href="/wiki/Khadki" title="Khadki">Khadki</a></li>
<li><a href="/wiki/Kharadi" title

### That is what we want. This is for a 'Pune City'. We will get it for 'Pimpri-Chinchwad' also

In [7]:
wiki_list2 = soup.find_all("ul")[2]
wiki_list2

<ul><li><a class="new" href="/w/index.php?title=Akurdi&amp;action=edit&amp;redlink=1" title="Akurdi (page does not exist)">Akurdi</a></li>
<li><a href="/wiki/Bhosari" title="Bhosari">Bhosari</a></li>
<li><a class="mw-redirect" href="/wiki/Chakan,_Maharashtra" title="Chakan, Maharashtra">Chakan</a></li>
<li><a href="/wiki/Charholi_Budruk" title="Charholi Budruk">Charholi Budruk</a></li>
<li><a href="/wiki/Chikhli,_Maharashtra" title="Chikhli, Maharashtra">Chikhli</a></li>
<li><a href="/wiki/Chinchwad" title="Chinchwad">Chinchwad</a></li>
<li><a href="/wiki/Dapodi" title="Dapodi">Dapodi</a></li>
<li><a href="/wiki/Dehu_Road" title="Dehu Road">Dehu Road</a></li>
<li>Dighi</li>
<li>Dudulgaon</li>
<li><a class="new" href="/w/index.php?title=Hinjwadi&amp;action=edit&amp;redlink=1" title="Hinjwadi (page does not exist)">Hinjwadi</a></li>
<li><a href="/wiki/Kalewadi" title="Kalewadi">Kalewadi</a></li>
<li>Kasarwadi</li>
<li><a class="mw-redirect" href="/wiki/Moshi,_Maharashtra" title="Moshi, M

### We got both. wiki_list1 and 2 are the results we want. Let's scrape a list from this web data

In [8]:
# append the data into the list
# For Pune city
for row in wiki_list1.findAll("li"):
    neighborhoodList.append(row.text) 
#For Pimpri Chinchwad
for row in wiki_list2.findAll("li"):
    neighborhoodList.append(row.text)

In [9]:
#Let's check the list
print(neighborhoodList)
print(len(neighborhoodList))

['Ambegaon', 'Aundh', 'Baner', 'Bavdhan', 'Bhamburde (now called Shivajinagar)', 'Bibvewadi', 'Balewadi', 'Dhankawadi', 'Dhanori', 'Dhayari', 'Erandwane', 'Fursungi', 'Ganesh khind', 'Ghorpadi', 'Hadapsar', 'Kalas', 'Katraj', 'Khadki', 'Kharadi', 'Kondhwa', 'Koregaon Park', 'Kothrud', 'Manjri', 'Markal', 'Mohammedwadi', 'Mundhwa', 'Parvati', 'Panmala', 'Pashan', 'Pirangut', 'Saswad', 'Undri', 'Vishrantwadi', 'Vitthalwadi', 'Vadgaon Budruk', 'Wadgaon Sheri', 'Wagholi', 'Wanwadi', 'Warje', 'Yerwada', 'Akurdi', 'Bhosari', 'Chakan', 'Charholi Budruk', 'Chikhli', 'Chinchwad', 'Dapodi', 'Dehu Road', 'Dighi', 'Dudulgaon', 'Hinjwadi', 'Kalewadi', 'Kasarwadi', 'Moshi', 'Phugewadi', 'Pimple Gurav', 'Pimple Nilakh', 'Pimple Saudagar', 'Pimpri', 'Ravet', 'Rahatani', 'Sangvi', 'Talawade', 'Tathawade', 'Thergaon', 'Wakad']
66


### Removing some unnecessary neighbourhoods

In [10]:
# Need to change names and remove some neighbourhoods
neighborhoodList[4] = 'Shivajinagar'

drp = ['Ambegaon', 'Fursungi','Panmala', 'Kalas', 'Manjri', 'Markal', 'Pirangut', 'Wagholi', 'Vitthalwadi','Saswad', 'Akurdi', 'Talawade', 'Chikhli',
      'Dudulgaon', 'Wanwadi', 'Sangvi', 'Dapodi', 'Pimpri', 'Kalewadi']
for i in drp:
    neighborhoodList.remove(i)
neighborhoodList

['Aundh',
 'Baner',
 'Bavdhan',
 'Shivajinagar',
 'Bibvewadi',
 'Balewadi',
 'Dhankawadi',
 'Dhanori',
 'Dhayari',
 'Erandwane',
 'Ganesh khind',
 'Ghorpadi',
 'Hadapsar',
 'Katraj',
 'Khadki',
 'Kharadi',
 'Kondhwa',
 'Koregaon Park',
 'Kothrud',
 'Mohammedwadi',
 'Mundhwa',
 'Parvati',
 'Pashan',
 'Undri',
 'Vishrantwadi',
 'Vadgaon Budruk',
 'Wadgaon Sheri',
 'Warje',
 'Yerwada',
 'Bhosari',
 'Chakan',
 'Charholi Budruk',
 'Chinchwad',
 'Dehu Road',
 'Dighi',
 'Hinjwadi',
 'Kasarwadi',
 'Moshi',
 'Phugewadi',
 'Pimple Gurav',
 'Pimple Nilakh',
 'Pimple Saudagar',
 'Ravet',
 'Rahatani',
 'Tathawade',
 'Thergaon',
 'Wakad']

In [11]:
# create a new DataFrame from the list
df = pd.DataFrame({"Neighborhood": neighborhoodList})
df.head()

Unnamed: 0,Neighborhood
0,Aundh
1,Baner
2,Bavdhan
3,Shivajinagar
4,Bibvewadi


In [12]:
# print the number of rows of the dataframe
df.shape

(47, 1)

### 3. Get the geographical coordinates
###    I will be using HERE maps API instead of OpenStreetMaps

In [13]:
# define a function to get coordinates
# Get you HERE maps API app id and code from HERE Devlopers website.
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        #g = geocoder.arcgis('{}, Pune, India'.format(neighborhood)) This is for ArcGis Geocoder
        g = geocoder.here('{}, Pune, India'.format(neighborhood), app_id ='YOUR_APP_ID', app_code = 'YOUR_APP_CODE')
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [14]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in df["Neighborhood"].tolist() ]

In [15]:
coords

[[18.56325, 73.81229],
 [18.5482, 73.77317],
 [18.51106, 73.77741],
 [18.53681, 73.83818],
 [18.47187, 73.86336],
 [18.57602, 73.77983],
 [18.46623, 73.85328],
 [18.57856, 73.89264],
 [18.44702, 73.80757],
 [18.50938, 73.83151],
 [18.54075, 73.82925],
 [18.52232, 73.89712],
 [18.50253, 73.92707],
 [18.44733, 73.86406],
 [19.00235, 73.98641],
 [18.54463, 73.93922],
 [18.47106, 73.89156],
 [18.53533, 73.89382],
 [18.50482, 73.80239],
 [18.47867, 73.91594],
 [18.53029, 73.92109],
 [18.48696, 73.85006],
 [18.53686, 73.7932],
 [18.45427, 73.91788],
 [18.57471, 73.87725],
 [18.46726, 73.82476],
 [18.53789, 73.93267],
 [18.47212, 73.80214],
 [18.57086, 73.88003],
 [18.63873, 73.83748],
 [18.73416, 73.85856],
 [18.64073, 73.90397],
 [18.64744, 73.80003],
 [18.67916, 73.73255],
 [18.61522, 73.87241],
 [18.57888, 73.75727],
 [18.60263, 73.82435],
 [18.65376, 73.86195],
 [18.59017, 73.83033],
 [18.58901, 73.81815],
 [18.57908, 73.78654],
 [18.59854, 73.80025],
 [18.64513, 73.73638],
 [18.59925, 7

In [16]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
print(df_coords)
df_coords.shape

    Latitude  Longitude
0   18.56325   73.81229
1   18.54820   73.77317
2   18.51106   73.77741
3   18.53681   73.83818
4   18.47187   73.86336
5   18.57602   73.77983
6   18.46623   73.85328
7   18.57856   73.89264
8   18.44702   73.80757
9   18.50938   73.83151
10  18.54075   73.82925
11  18.52232   73.89712
12  18.50253   73.92707
13  18.44733   73.86406
14  19.00235   73.98641
15  18.54463   73.93922
16  18.47106   73.89156
17  18.53533   73.89382
18  18.50482   73.80239
19  18.47867   73.91594
20  18.53029   73.92109
21  18.48696   73.85006
22  18.53686   73.79320
23  18.45427   73.91788
24  18.57471   73.87725
25  18.46726   73.82476
26  18.53789   73.93267
27  18.47212   73.80214
28  18.57086   73.88003
29  18.63873   73.83748
30  18.73416   73.85856
31  18.64073   73.90397
32  18.64744   73.80003
33  18.67916   73.73255
34  18.61522   73.87241
35  18.57888   73.75727
36  18.60263   73.82435
37  18.65376   73.86195
38  18.59017   73.83033
39  18.58901   73.81815
40  18.57908   7

(47, 2)

In [17]:
# merge the coordinates into the original dataframe
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']
df.shape

(47, 3)

In [18]:
# check the neighborhoods and the coordinates
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Aundh,18.56325,73.81229
1,Baner,18.5482,73.77317
2,Bavdhan,18.51106,73.77741
3,Shivajinagar,18.53681,73.83818
4,Bibvewadi,18.47187,73.86336
5,Balewadi,18.57602,73.77983
6,Dhankawadi,18.46623,73.85328
7,Dhanori,18.57856,73.89264
8,Dhayari,18.44702,73.80757
9,Erandwane,18.50938,73.83151


In [19]:
# save the DataFrame as CSV file
df.to_csv("df.csv", index=False)

### 4. Create a map of Pune with neighborhoods superimposed on top

In [20]:
# get the coordinates of Pune
# get your HERE maps API app_id and app_code from HERE Developers website
address = 'Pune'

#geolocator = Nominatim(user_agent="my-application")
geolocator = Here(app_id ='YOUR_APP_ID', app_code = 'YOUR_APP_CODE')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Pune, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Pune, India 18.50422, 73.85302.


In [22]:
# create map of Pune using latitude and longitude values
map_pune = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_pune)  
    
map_pune

In [23]:
# save the map as HTML file
map_pune.save('map_pune.html')

### 5. Use the Foursquare API to explore the neighborhoods

In [24]:
# define Foursquare Credentials and Version
# get your Foursquare Client ID and Client Secret from Foursquare developer website
CLIENT_ID = 'YOUR_CLIEND_ID' # your Foursquare ID
CLIENT_SECRET = 'YOUR_CLIENT_SECRET' # your Foursquare Secret
VERSION = '20192611' # Foursquare API version 20180605

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RTXXXYVR11A2JIWBZEXSLMZRWJYATY4VK0ZSASJDU0RELMET
CLIENT_SECRET:TEH2WJ1O1VHWJ2COZ0G0QQDRVY0A4QCDWOH0N2EFYRHHNRVF


**Now, let's get the top 100 venues that are within a radius of 2000 meters.**

In [25]:
radius = 4000
LIMIT = 300

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [26]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
venues_df.to_csv('venues_df.csv', index = False)
print(venues_df.shape)
venues_df.head()

(3132, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Aundh,18.56325,73.81229,Westend mall,18.561814,73.80722,Shopping Mall
1,Aundh,18.56325,73.81229,Crosswords,18.556177,73.809131,Bookstore
2,Aundh,18.56325,73.81229,Yolkshire,18.553576,73.806888,English Restaurant
3,Aundh,18.56325,73.81229,Mithas,18.554635,73.809369,Dessert Shop
4,Aundh,18.56325,73.81229,Starbucks,18.556595,73.809153,Coffee Shop


**Let's check how many venues were returned for each neighorhood**

In [27]:
#venues_df.groupby(["Neighborhood"]).count()
#venues_df.groupby(["VenueName"]).count()
venues_df.groupby(["VenueCategory"]).count()

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude
VenueCategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Accessories Store,1,1,1,1,1,1
Airport Gate,2,2,2,2,2,2
Airport Service,3,3,3,3,3,3
American Restaurant,29,29,29,29,29,29
Arcade,5,5,5,5,5,5
Asian Restaurant,58,58,58,58,58,58
Athletics & Sports,2,2,2,2,2,2
BBQ Joint,37,37,37,37,37,37
Bakery,79,79,79,79,79,79
Bank,2,2,2,2,2,2


**Let's find out how many unique categories can be curated from all the returned venues**

In [28]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 147 uniques categories.


In [29]:
# print out the list of categories
venues_df['VenueCategory'].unique()

array(['Shopping Mall', 'Bookstore', 'English Restaurant', 'Dessert Shop',
       'Coffee Shop', 'Jewelry Store', 'Indian Restaurant', 'Multiplex',
       'Gym', 'Donut Shop', 'Ice Cream Shop', 'Mexican Restaurant',
       'Lounge', 'Clothing Store', 'Bakery', 'Chinese Restaurant',
       'Chocolate Shop', 'Brewery', 'Malay Restaurant', 'BBQ Joint',
       'South Indian Restaurant', 'Hotel', 'Asian Restaurant',
       'Italian Restaurant', 'Breakfast Spot', 'Fast Food Restaurant',
       'Nightclub', 'Snack Place', 'Vegetarian / Vegan Restaurant',
       'Molecular Gastronomy Restaurant', 'Restaurant', 'Bistro', 'Café',
       'Hotel Bar', 'Pizza Place', 'Motorcycle Shop', 'Sandwich Place',
       'Juice Bar', 'Seafood Restaurant', 'Food Court',
       'Middle Eastern Restaurant', 'North Indian Restaurant',
       'Punjabi Restaurant', 'Beer Garden', 'Golf Course', 'Bar',
       'Chaat Place', 'Department Store', 'French Restaurant',
       'Electronics Store', 'Stadium', 'Cocktail Bar

### 6. Analyze Each Neighborhood

In [30]:
# one hot encoding
pune_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
pune_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [pune_onehot.columns[-1]] + list(pune_onehot.columns[:-1])
pune_onehot = pune_onehot[fixed_columns]

print(pune_onehot.shape)
pune_onehot.head()

(3132, 148)


Unnamed: 0,Neighborhoods,Accessories Store,Airport Gate,Airport Service,American Restaurant,Arcade,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Beach Bar,Beer Garden,Bistro,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Burger Joint,Bus Station,Café,Casino,Chaat Place,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Cosmetics Shop,Cricket Ground,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distillery,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,English Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Garden,Garden Center,Gastropub,General Entertainment,Golf Course,Gourmet Shop,Grocery Store,Gun Shop,Gym,Gym / Fitness Center,Historic Site,History Museum,Hookah Bar,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Italian Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lake,Liquor Store,Lounge,Maharashtrian Restaurant,Malay Restaurant,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Molecular Gastronomy Restaurant,Motel,Motorcycle Shop,Mountain,Movie Theater,Moving Target,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Nightclub,North Indian Restaurant,Organic Grocery,Other Great Outdoors,Other Nightlife,Park,Parsi Restaurant,Pizza Place,Platform,Plaza,Pub,Punjabi Restaurant,Racetrack,Resort,Restaurant,Rock Club,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South Indian Restaurant,Southern / Soul Food Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Track Stadium,Trail,Train,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Shop,Yoga Studio,Zoo
0,Aundh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Aundh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Aundh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Aundh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Aundh,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


**Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category**

In [31]:
pune_grouped = pune_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(pune_grouped.shape)
pune_grouped

(47, 148)


Unnamed: 0,Neighborhoods,Accessories Store,Airport Gate,Airport Service,American Restaurant,Arcade,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Beach Bar,Beer Garden,Bistro,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Burger Joint,Bus Station,Café,Casino,Chaat Place,Cheese Shop,Chinese Restaurant,Chocolate Shop,Clothing Store,Cocktail Bar,Coffee Shop,Convenience Store,Cosmetics Shop,Cricket Ground,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Distillery,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,English Restaurant,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Garden,Garden Center,Gastropub,General Entertainment,Golf Course,Gourmet Shop,Grocery Store,Gun Shop,Gym,Gym / Fitness Center,Historic Site,History Museum,Hookah Bar,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Italian Restaurant,Jazz Club,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lake,Liquor Store,Lounge,Maharashtrian Restaurant,Malay Restaurant,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Molecular Gastronomy Restaurant,Motel,Motorcycle Shop,Mountain,Movie Theater,Moving Target,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Nightclub,North Indian Restaurant,Organic Grocery,Other Great Outdoors,Other Nightlife,Park,Parsi Restaurant,Pizza Place,Platform,Plaza,Pub,Punjabi Restaurant,Racetrack,Resort,Restaurant,Rock Club,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South Indian Restaurant,Southern / Soul Food Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theater,Theme Park,Track Stadium,Trail,Train,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Shop,Yoga Studio,Zoo
0,Aundh,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.05,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.06,0.01,0.01,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.06,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.05,0.0,0.13,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.07,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.02,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
1,Balewadi,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.02,0.03,0.0,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.05,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.05,0.0,0.14,0.02,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.05,0.0,0.0,0.02,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
2,Baner,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.05,0.0,0.01,0.0,0.01,0.02,0.01,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.05,0.0,0.01,0.0,0.04,0.0,0.01,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.05,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.07,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.05,0.0,0.13,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.03,0.0,0.01,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
3,Bavdhan,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.0,0.031746,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.126984,0.0,0.015873,0.0,0.015873,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.015873,0.047619,0.0,0.031746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.063492,0.0,0.0,0.031746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.015873,0.0,0.15873,0.015873,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.015873,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.0,0.015873,0.0,0.0,0.0,0.015873,0.0,0.031746,0.015873,0.047619,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.0,0.0,0.0,0.0
4,Bhosari,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.041667,0.041667,0.0,0.0,0.0,0.291667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bibvewadi,0.0,0.0,0.0,0.0,0.011765,0.035294,0.0,0.011765,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.047059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.082353,0.011765,0.0,0.0,0.0,0.0,0.011765,0.011765,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.082353,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.023529,0.0,0.023529,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.023529,0.0,0.0,0.0,0.035294,0.0,0.094118,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.011765,0.0,0.035294,0.023529,0.011765,0.035294,0.0,0.0,0.023529,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.047059,0.0,0.011765,0.0,0.0,0.011765
6,Chakan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Charholi Budruk,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Chinchwad,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.0,0.015625,0.0625,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.015625,0.0,0.015625,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.078125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.03125,0.015625,0.0,0.03125,0.0,0.203125,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.09375,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.0,0.0,0.0,0.015625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.015625,0.015625,0.0,0.0,0.0,0.0,0.0
9,Dehu Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
len(pune_grouped[pune_grouped["Indian Restaurant"] > 0])

45

**Create a new DataFrame for Indian Restaurant data only**

In [33]:
pune_res = pune_grouped[["Neighborhoods","Indian Restaurant"]]

In [35]:
pune_res.head()

Unnamed: 0,Neighborhoods,Indian Restaurant
0,Aundh,0.13
1,Balewadi,0.14
2,Baner,0.13
3,Bavdhan,0.15873
4,Bhosari,0.291667


### 7. Cluster Neighborhoods
Run k-means to cluster the neighborhoods in Pune into 3 clusters.

In [36]:
# set number of clusters
kclusters = 3

pune_clustering = pune_res.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(pune_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 2, 0, 0, 2, 2, 2])

In [37]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
pune_merged = pune_res.copy()

# add clustering labels
pune_merged["Cluster Labels"] = kmeans.labels_

In [38]:
pune_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
pune_merged.head()

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels
0,Aundh,0.13,0
1,Balewadi,0.14,0
2,Baner,0.13,0
3,Bavdhan,0.15873,0
4,Bhosari,0.291667,2


In [39]:
# merge pune_grouped with data to add latitude/longitude for each neighborhood
pune_merged = pune_merged.join(df.set_index("Neighborhood"), on="Neighborhood")

print(pune_merged.shape)
pune_merged.head() # check the last columns!

(47, 5)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Aundh,0.13,0,18.56325,73.81229
1,Balewadi,0.14,0,18.57602,73.77983
2,Baner,0.13,0,18.5482,73.77317
3,Bavdhan,0.15873,0,18.51106,73.77741
4,Bhosari,0.291667,2,18.63873,73.83748


In [40]:
# sort the results by Cluster Labels
print(pune_merged.shape)
pune_merged.sort_values(["Cluster Labels"], inplace=True)
pune_merged

(47, 5)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Aundh,0.13,0,18.56325,73.81229
45,Warje,0.135135,0,18.47212,73.80214
24,Koregaon Park,0.1,0,18.53533,73.89382
25,Kothrud,0.084507,0,18.50482,73.80239
26,Mohammedwadi,0.068966,0,18.47867,73.91594
27,Moshi,0.166667,0,18.65376,73.86195
28,Mundhwa,0.08,0,18.53029,73.92109
29,Parvati,0.16,0,18.48696,73.85006
30,Pashan,0.1,0,18.53686,73.7932
22,Kharadi,0.07,0,18.54463,73.93922


**Finally, let's visualize the resulting clusters**

In [41]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(pune_merged['Latitude'], pune_merged['Longitude'], pune_merged['Neighborhood'], pune_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [42]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

### 8. Examine Clusters

#### Cluster 0

In [43]:
pune_merged.loc[pune_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,Aundh,0.13,0,18.56325,73.81229
45,Warje,0.135135,0,18.47212,73.80214
24,Koregaon Park,0.1,0,18.53533,73.89382
25,Kothrud,0.084507,0,18.50482,73.80239
26,Mohammedwadi,0.068966,0,18.47867,73.91594
27,Moshi,0.166667,0,18.65376,73.86195
28,Mundhwa,0.08,0,18.53029,73.92109
29,Parvati,0.16,0,18.48696,73.85006
30,Pashan,0.1,0,18.53686,73.7932
22,Kharadi,0.07,0,18.54463,73.93922


#### Cluster 1

In [44]:
pune_merged.loc[pune_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
13,Dighi,0.8,1,18.61522,73.87241


#### Cluster 2

In [45]:
pune_merged.loc[pune_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
4,Bhosari,0.291667,2,18.63873,73.83748
19,Kasarwadi,0.282051,2,18.60263,73.82435
8,Chinchwad,0.203125,2,18.64744,73.80003
36,Ravet,0.277778,2,18.64513,73.73638
35,Rahatani,0.243243,2,18.59925,73.78632
32,Pimple Gurav,0.22973,2,18.58901,73.81815
31,Phugewadi,0.23913,2,18.59017,73.83033
7,Charholi Budruk,0.25,2,18.64073,73.90397
9,Dehu Road,0.375,2,18.67916,73.73255
