### 1. Import libraries

In [1]:
import numpy as np 
import pandas as pd 
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json 

from geopy.geocoders import Nominatim
from geopy.geocoders import Here #Using OpenStreetMap for co-ordinates
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import seaborn as sns #Using Seaborn for graphs instead of MatPlotLib

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

### 2. Scrape data from Wikipedia page into a DataFrame

In [2]:
data = requests.get("https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Mumbai").text

In [3]:
# parse data from the html into a beautifulsoup object
soup = BeautifulSoup(data, 'html.parser')

In [4]:
# create a list to store neighborhood data
neighborhoodList = []

In [5]:
#Let's check where is the neighbourhoods list
wiki_list0 = soup.find_all("ul")[0]
wiki_list0

<ul>
<li class="toclevel-1 tocsection-1"><a href="#Western_Suburbs"><span class="tocnumber">1</span> <span class="toctext">Western Suburbs</span></a>
<ul>
<li class="toclevel-2 tocsection-2"><a href="#Andheri"><span class="tocnumber">1.1</span> <span class="toctext">Andheri</span></a></li>
<li class="toclevel-2 tocsection-3"><a href="#Bhayandar"><span class="tocnumber">1.2</span> <span class="toctext">Bhayandar</span></a></li>
<li class="toclevel-2 tocsection-4"><a href="#Bandra"><span class="tocnumber">1.3</span> <span class="toctext">Bandra</span></a></li>
<li class="toclevel-2 tocsection-5"><a href="#Borivali"><span class="tocnumber">1.4</span> <span class="toctext">Borivali</span></a></li>
<li class="toclevel-2 tocsection-6"><a href="#Dahisar"><span class="tocnumber">1.5</span> <span class="toctext">Dahisar</span></a></li>
<li class="toclevel-2 tocsection-7"><a href="#Goregaon"><span class="tocnumber">1.6</span> <span class="toctext">Goregaon</span></a></li>
<li class="toclevel-2 t

In [6]:
wiki_list1 = soup.find_all("ul")[1]
wiki_list1

<ul>
<li class="toclevel-2 tocsection-2"><a href="#Andheri"><span class="tocnumber">1.1</span> <span class="toctext">Andheri</span></a></li>
<li class="toclevel-2 tocsection-3"><a href="#Bhayandar"><span class="tocnumber">1.2</span> <span class="toctext">Bhayandar</span></a></li>
<li class="toclevel-2 tocsection-4"><a href="#Bandra"><span class="tocnumber">1.3</span> <span class="toctext">Bandra</span></a></li>
<li class="toclevel-2 tocsection-5"><a href="#Borivali"><span class="tocnumber">1.4</span> <span class="toctext">Borivali</span></a></li>
<li class="toclevel-2 tocsection-6"><a href="#Dahisar"><span class="tocnumber">1.5</span> <span class="toctext">Dahisar</span></a></li>
<li class="toclevel-2 tocsection-7"><a href="#Goregaon"><span class="tocnumber">1.6</span> <span class="toctext">Goregaon</span></a></li>
<li class="toclevel-2 tocsection-8"><a href="#Jogeshwari"><span class="tocnumber">1.7</span> <span class="toctext">Jogeshwari</span></a></li>
<li class="toclevel-2 tocsectio

In [7]:
wiki_list2 = soup.find_all("ul")[2]
wiki_list2

<ul>
<li class="toclevel-2 tocsection-19"><a href="#Bhandup"><span class="tocnumber">2.1</span> <span class="toctext">Bhandup</span></a></li>
<li class="toclevel-2 tocsection-20"><a href="#Ghatkopar"><span class="tocnumber">2.2</span> <span class="toctext">Ghatkopar</span></a></li>
<li class="toclevel-2 tocsection-21"><a href="#Kanjurmarg"><span class="tocnumber">2.3</span> <span class="toctext">Kanjurmarg</span></a></li>
<li class="toclevel-2 tocsection-22"><a href="#Kurla"><span class="tocnumber">2.4</span> <span class="toctext">Kurla</span></a></li>
<li class="toclevel-2 tocsection-23"><a href="#Mulund"><span class="tocnumber">2.5</span> <span class="toctext">Mulund</span></a></li>
<li class="toclevel-2 tocsection-24"><a href="#Powai"><span class="tocnumber">2.6</span> <span class="toctext">Powai</span></a></li>
<li class="toclevel-2 tocsection-25"><a href="#Vidyavihar"><span class="tocnumber">2.7</span> <span class="toctext">Vidyavihar</span></a></li>
<li class="toclevel-2 tocsecti

In [8]:
wiki_list3 = soup.find_all("ul")[3]
wiki_list3

<ul>
<li class="toclevel-2 tocsection-28"><a href="#Chembur"><span class="tocnumber">3.1</span> <span class="toctext">Chembur</span></a></li>
<li class="toclevel-2 tocsection-29"><a href="#Govandi"><span class="tocnumber">3.2</span> <span class="toctext">Govandi</span></a></li>
<li class="toclevel-2 tocsection-30"><a href="#Mankhurd"><span class="tocnumber">3.3</span> <span class="toctext">Mankhurd</span></a></li>
<li class="toclevel-2 tocsection-31"><a href="#Trombay"><span class="tocnumber">3.4</span> <span class="toctext">Trombay</span></a></li>
</ul>

In [10]:
wiki_list4 = soup.find_all("ul")[4]
wiki_list4

<ul>
<li class="toclevel-2 tocsection-33"><a href="#Antop_Hill"><span class="tocnumber">4.1</span> <span class="toctext">Antop Hill</span></a></li>
<li class="toclevel-2 tocsection-34"><a href="#Byculla"><span class="tocnumber">4.2</span> <span class="toctext">Byculla</span></a></li>
<li class="toclevel-2 tocsection-35"><a href="#Colaba"><span class="tocnumber">4.3</span> <span class="toctext">Colaba</span></a></li>
<li class="toclevel-2 tocsection-36"><a href="#Dadar"><span class="tocnumber">4.4</span> <span class="toctext">Dadar</span></a></li>
<li class="toclevel-2 tocsection-37"><a href="#Fort"><span class="tocnumber">4.5</span> <span class="toctext">Fort</span></a></li>
<li class="toclevel-2 tocsection-38"><a href="#Girgaon"><span class="tocnumber">4.6</span> <span class="toctext">Girgaon</span></a></li>
<li class="toclevel-2 tocsection-39"><a href="#Kalbadevi"><span class="tocnumber">4.7</span> <span class="toctext">Kalbadevi</span></a></li>
<li class="toclevel-2 tocsection-40"><

#### Let's scrape a list from this web data

In [11]:
# append the data into the list
# For Western Suburbs
for row in wiki_list1.findAll("li"):
    neighborhoodList.append(row.text) 
#For Eastern Suburbs
for row in wiki_list2.findAll("li"):
    neighborhoodList.append(row.text)
#For Harbour Suburbs
for row in wiki_list3.findAll("li"):
    neighborhoodList.append(row.text)
#For South Mumbai
for row in wiki_list4.findAll("li"):
    neighborhoodList.append(row.text)

In [12]:
#Let's check the list
print(neighborhoodList)
print(len(neighborhoodList))

['1.1 Andheri', '1.2 Bhayandar', '1.3 Bandra', '1.4 Borivali', '1.5 Dahisar', '1.6 Goregaon', '1.7 Jogeshwari', '1.8 Juhu', '1.9 Kandivali west', '1.10 Kandivali east', '1.11 Khar', '1.12 Malad', '1.13 Santacruz', '1.14 Vasai', '1.15 Virar', '1.16 Vile Parle', '2.1 Bhandup', '2.2 Ghatkopar', '2.3 Kanjurmarg', '2.4 Kurla', '2.5 Mulund', '2.6 Powai', '2.7 Vidyavihar', '2.8 Vikhroli', '3.1 Chembur', '3.2 Govandi', '3.3 Mankhurd', '3.4 Trombay', '4.1 Antop Hill', '4.2 Byculla', '4.3 Colaba', '4.4 Dadar', '4.5 Fort', '4.6 Girgaon', '4.7 Kalbadevi', '4.8 Kamathipura', '4.9 Matunga', '4.10 Parel', '4.11 Tardeo']
39


In [13]:
neighborhoodList

['1.1 Andheri',
 '1.2 Bhayandar',
 '1.3 Bandra',
 '1.4 Borivali',
 '1.5 Dahisar',
 '1.6 Goregaon',
 '1.7 Jogeshwari',
 '1.8 Juhu',
 '1.9 Kandivali west',
 '1.10 Kandivali east',
 '1.11 Khar',
 '1.12 Malad',
 '1.13 Santacruz',
 '1.14 Vasai',
 '1.15 Virar',
 '1.16 Vile Parle',
 '2.1 Bhandup',
 '2.2 Ghatkopar',
 '2.3 Kanjurmarg',
 '2.4 Kurla',
 '2.5 Mulund',
 '2.6 Powai',
 '2.7 Vidyavihar',
 '2.8 Vikhroli',
 '3.1 Chembur',
 '3.2 Govandi',
 '3.3 Mankhurd',
 '3.4 Trombay',
 '4.1 Antop Hill',
 '4.2 Byculla',
 '4.3 Colaba',
 '4.4 Dadar',
 '4.5 Fort',
 '4.6 Girgaon',
 '4.7 Kalbadevi',
 '4.8 Kamathipura',
 '4.9 Matunga',
 '4.10 Parel',
 '4.11 Tardeo']

In [14]:
# create a new DataFrame from the list
df = pd.DataFrame({"Neighborhood": neighborhoodList})
df.head()

Unnamed: 0,Neighborhood
0,1.1 Andheri
1,1.2 Bhayandar
2,1.3 Bandra
3,1.4 Borivali
4,1.5 Dahisar


In [15]:
# print the number of rows of the dataframe
df.shape

(39, 1)

### 3. Get the geographical coordinates

In [20]:
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Mumbai, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [22]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in df["Neighborhood"].tolist() ]

In [23]:
coords

[[19.11848309908247, 72.84177419095158],
 [19.30746000000005, 72.85170000000005],
 [19.054220000000043, 72.84019000000006],
 [19.229360000000042, 72.85751000000005],
 [19.250030000000038, 72.85908000000006],
 [19.164550000000077, 72.84946000000008],
 [19.13790000000006, 72.84941000000003],
 [19.01493000000005, 72.84522000000004],
 [19.207110000000057, 72.83492000000007],
 [19.205750000000023, 72.86969000000005],
 [19.073447406518884, 72.83594856665043],
 [19.186550000000068, 72.84836000000007],
 [19.081770000000063, 72.84205000000003],
 [19.07934000000006, 72.83916000000005],
 [19.01657000000006, 72.85853000000003],
 [19.100580000000036, 72.84377000000006],
 [19.134991137657018, 72.94007198350238],
 [19.086476606699875, 72.9089562772808],
 [19.131400000000042, 72.93565000000007],
 [19.140931543000022, 72.88260426900007],
 [19.171850000000063, 72.95564000000007],
 [19.123110000000054, 72.90944000000007],
 [19.023261171244744, 72.84389992492353],
 [19.111090000000047, 72.92781000000008],

In [24]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
print(df_coords)
df_coords.shape

     Latitude  Longitude
0   19.118483  72.841774
1   19.307460  72.851700
2   19.054220  72.840190
3   19.229360  72.857510
4   19.250030  72.859080
5   19.164550  72.849460
6   19.137900  72.849410
7   19.014930  72.845220
8   19.207110  72.834920
9   19.205750  72.869690
10  19.073447  72.835949
11  19.186550  72.848360
12  19.081770  72.842050
13  19.079340  72.839160
14  19.016570  72.858530
15  19.100580  72.843770
16  19.134991  72.940072
17  19.086477  72.908956
18  19.131400  72.935650
19  19.140932  72.882604
20  19.171850  72.955640
21  19.123110  72.909440
22  19.023261  72.843900
23  19.111090  72.927810
24  19.062200  72.902420
25  19.056170  72.914830
26  19.048530  72.932200
27  19.019000  72.897990
28  19.023011  72.866059
29  18.980740  72.840750
30  18.915270  72.826140
31  19.019920  72.840870
32  18.932260  72.832880
33  18.956960  72.819450
34  18.950040  72.829950
35  18.961720  72.826270
36  19.140932  72.882604
37  18.995660  72.839070
38  18.972430  72.814830


(39, 2)

In [25]:
# merge the coordinates into the original dataframe
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']
df.shape

(39, 3)

In [26]:
# check the neighborhoods and the coordinates
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,1.1 Andheri,19.118483,72.841774
1,1.2 Bhayandar,19.30746,72.8517
2,1.3 Bandra,19.05422,72.84019
3,1.4 Borivali,19.22936,72.85751
4,1.5 Dahisar,19.25003,72.85908
5,1.6 Goregaon,19.16455,72.84946
6,1.7 Jogeshwari,19.1379,72.84941
7,1.8 Juhu,19.01493,72.84522
8,1.9 Kandivali west,19.20711,72.83492
9,1.10 Kandivali east,19.20575,72.86969


In [27]:
# save the DataFrame as CSV file
df.to_csv("df.csv", index=False)

### 4. Create a map of Mumbai with neighborhoods superimposed on top

In [28]:
# get the coordinates of Kuala Lumpur
address = 'Mumbai, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai, India 18.9387711, 72.8353355.


In [29]:
# create map of Pune using latitude and longitude values
map_mumbai = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_mumbai)  
    
map_mumbai

In [31]:
# save the map as HTML file
map_mumbai.save('map_mumbai.html')

### 5. Use the Foursquare API to explore the neighborhoods

In [32]:
# define Foursquare Credentials and Version
# get your Foursquare Client ID and Client Secret from Foursquare developer website
CLIENT_ID = 'VMFTAN2XGPZMOYYL1IKSADQXQQ3MZWV0VWXQCSPZGUXSYEK4' # your Foursquare ID
CLIENT_SECRET = 'FHMUBKT5ZJZABRFU2F43FCIR0WFOGR0G3Z0Q0P3ZORN52VAL' # your Foursquare Secret
VERSION = '20192911' # Foursquare API version 20180605

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: VMFTAN2XGPZMOYYL1IKSADQXQQ3MZWV0VWXQCSPZGUXSYEK4
CLIENT_SECRET:FHMUBKT5ZJZABRFU2F43FCIR0WFOGR0G3Z0Q0P3ZORN52VAL


#### Now, let's get the top 100 venues that are within a radius of 2000 meters

In [33]:
radius = 4000
LIMIT = 300

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [34]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
venues_df.to_csv('venues_df.csv', index = False)
print(venues_df.shape)
venues_df.head()

(3661, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,1.1 Andheri,19.118483,72.841774,Merwans Cake shop,19.1193,72.845418,Bakery
1,1.1 Andheri,19.118483,72.841774,Radha Krishna Veg Restaurant,19.11513,72.84306,Indian Restaurant
2,1.1 Andheri,19.118483,72.841774,Naturals,19.111204,72.837255,Ice Cream Shop
3,1.1 Andheri,19.118483,72.841774,Joey's Pizza,19.126762,72.830001,Pizza Place
4,1.1 Andheri,19.118483,72.841774,Shawarma Factory,19.124591,72.840398,Falafel Restaurant


#### Let's check how many venues were returned for each neighorhood


In [35]:
venues_df.groupby(["VenueCategory"]).count()

Unnamed: 0_level_0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude
VenueCategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghan Restaurant,3,3,3,3,3,3
Airport,1,1,1,1,1,1
Airport Service,2,2,2,2,2,2
American Restaurant,14,14,14,14,14,14
Arcade,11,11,11,11,11,11
Art Gallery,7,7,7,7,7,7
Asian Restaurant,55,55,55,55,55,55
Athletics & Sports,10,10,10,10,10,10
Australian Restaurant,4,4,4,4,4,4
BBQ Joint,17,17,17,17,17,17


#### Let's find out how many unique categories can be curated from all the returned venues

In [36]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 174 uniques categories.


In [37]:
# print out the list of categories
venues_df['VenueCategory'].unique()

array(['Bakery', 'Indian Restaurant', 'Ice Cream Shop', 'Pizza Place',
       'Falafel Restaurant', 'Coffee Shop', 'Multiplex', 'Sandwich Place',
       'Breakfast Spot', 'Theater', 'Seafood Restaurant',
       'American Restaurant', 'Café', 'Maharashtrian Restaurant', 'Beach',
       'Juice Bar', 'Food Truck', 'Brewery', 'Hotel', 'Bar',
       'Mediterranean Restaurant', 'Chinese Restaurant', 'Cocktail Bar',
       'Pub', 'Lounge', 'Mughlai Restaurant', 'Club House', 'Snack Place',
       'Italian Restaurant', 'Gym / Fitness Center', 'Dessert Shop',
       'BBQ Joint', 'Comfort Food Restaurant', "Women's Store",
       'Spanish Restaurant', 'Spa', 'Diner', 'Movie Theater',
       'South Indian Restaurant', 'Nightclub', 'Gym', 'Cupcake Shop',
       'Sculpture Garden', 'Fast Food Restaurant', 'Recreation Center',
       'Restaurant', 'Train Station', 'Lake', 'Department Store',
       'Soccer Field', 'Convenience Store', 'Playground',
       'Shop & Service', 'Shopping Mall', 'Electron

### 6. Analyze Each Neighborhood

In [38]:
# one hot encoding
mumbai_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
mumbai_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [mumbai_onehot.columns[-1]] + list(mumbai_onehot.columns[:-1])
mumbai_onehot = mumbai_onehot[fixed_columns]

print(mumbai_onehot.shape)
mumbai_onehot.head()

(3661, 175)


Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Service,American Restaurant,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Bengali Restaurant,Big Box Store,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Buffet,Building,Burger Joint,Burrito Place,Bus Station,Café,Chaat Place,Cheese Shop,Chinese Restaurant,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Auditorium,College Gym,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,General College & University,General Entertainment,German Restaurant,Gift Shop,Goan Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Historic Site,History Museum,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Lake,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Mountain,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Venue,Neighborhood,New American Restaurant,Nightclub,North Indian Restaurant,Office,Other Great Outdoors,Outdoors & Recreation,Park,Parsi Restaurant,Performing Arts Venue,Pizza Place,Planetarium,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Recreation Center,Resort,Restaurant,Roof Deck,Salad Place,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South Indian Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Track,Train Station,Vegetarian / Vegan Restaurant,Water Park,Wine Bar,Women's Store
0,1.1 Andheri,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1.1 Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1.1 Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1.1 Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1.1 Andheri,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [39]:
mumbai_grouped = mumbai_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(mumbai_grouped.shape)
mumbai_grouped

(39, 175)


Unnamed: 0,Neighborhoods,Afghan Restaurant,Airport,Airport Service,American Restaurant,Arcade,Art Gallery,Asian Restaurant,Athletics & Sports,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Bengali Restaurant,Big Box Store,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridge,Buffet,Building,Burger Joint,Burrito Place,Bus Station,Café,Chaat Place,Cheese Shop,Chinese Restaurant,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Auditorium,College Gym,Comedy Club,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop,Coworking Space,Creperie,Cricket Ground,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,General College & University,General Entertainment,German Restaurant,Gift Shop,Goan Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Historic Site,History Museum,Hookah Bar,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irani Cafe,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Lake,Lounge,Maharashtrian Restaurant,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Mountain,Movie Theater,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Music Venue,Neighborhood,New American Restaurant,Nightclub,North Indian Restaurant,Office,Other Great Outdoors,Outdoors & Recreation,Park,Parsi Restaurant,Performing Arts Venue,Pizza Place,Planetarium,Playground,Plaza,Pool,Pub,Punjabi Restaurant,Recreation Center,Resort,Restaurant,Roof Deck,Salad Place,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,South Indian Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Track,Train Station,Vegetarian / Vegan Restaurant,Water Park,Wine Bar,Women's Store
0,1.1 Andheri,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.05,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.03,0.0,0.01,0.02,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.05,0.11,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.05,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
1,1.10 Kandivali east,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.03,0.01,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.03,0.0,0.02,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.01,0.08,0.1,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.03,0.02,0.0,0.02,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0
2,1.11 Khar,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.06,0.0,0.05,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09,0.0,0.0,0.01,0.01,0.01,0.0,0.05,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.03,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.02,0.03,0.02,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
3,1.12 Malad,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.02,0.05,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.01,0.01,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.02,0.08,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.03,0.0,0.04,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
4,1.13 Santacruz,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.04,0.0,0.07,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.02,0.01,0.01,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.06,0.0,0.03,0.07,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.01,0.0,0.05,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
5,1.14 Vasai,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.04,0.0,0.07,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.02,0.01,0.01,0.01,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.02,0.07,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
6,1.15 Virar,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.05,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.04,0.24,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.03,0.0,0.01,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0
7,1.16 Vile Parle,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.05,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.02,0.01,0.01,0.02,0.06,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.01,0.12,0.0,0.04,0.11,0.0,0.0,0.02,0.0,0.0,0.03,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.03,0.0,0.0,0.05,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
8,1.2 Bhayandar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.027778,0.0,0.027778,0.0,0.027778,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0
9,1.3 Bandra,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.06,0.0,0.06,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.02,0.01,0.0,0.01,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.04,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.07,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.0,0.02,0.03,0.02,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0


In [40]:
len(mumbai_grouped[mumbai_grouped["Indian Restaurant"] > 0])

39

#### Create a new DataFrame for Indian Restaurant data only

In [41]:
mumbai_res = mumbai_grouped[["Neighborhoods","Indian Restaurant"]]

In [42]:
mumbai_res.head()

Unnamed: 0,Neighborhoods,Indian Restaurant
0,1.1 Andheri,0.11
1,1.10 Kandivali east,0.1
2,1.11 Khar,0.04
3,1.12 Malad,0.08
4,1.13 Santacruz,0.07


### 7. Cluster Neighborhoods
Run k-means to cluster the neighborhoods in Mumbai into 3 clusters.

In [43]:
# set number of clusters
kclusters = 3

mumbai_clustering = mumbai_res.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mumbai_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 1, 1, 1, 1, 2, 0, 1, 1])

In [44]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
mumbai_merged = mumbai_res.copy()

# add clustering labels
mumbai_merged["Cluster Labels"] = kmeans.labels_

In [45]:
mumbai_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
mumbai_merged.head()

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels
0,1.1 Andheri,0.11,0
1,1.10 Kandivali east,0.1,1
2,1.11 Khar,0.04,1
3,1.12 Malad,0.08,1
4,1.13 Santacruz,0.07,1


In [46]:
# merge pune_grouped with data to add latitude/longitude for each neighborhood
mumbai_merged = mumbai_merged.join(df.set_index("Neighborhood"), on="Neighborhood")

print(mumbai_merged.shape)
mumbai_merged.head() # check the last columns!

(39, 5)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,1.1 Andheri,0.11,0,19.118483,72.841774
1,1.10 Kandivali east,0.1,1,19.20575,72.86969
2,1.11 Khar,0.04,1,19.073447,72.835949
3,1.12 Malad,0.08,1,19.18655,72.84836
4,1.13 Santacruz,0.07,1,19.08177,72.84205


In [47]:
# sort the results by Cluster Labels
print(mumbai_merged.shape)
mumbai_merged.sort_values(["Cluster Labels"], inplace=True)
mumbai_merged

(39, 5)


Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,1.1 Andheri,0.11,0,19.118483,72.841774
29,4.10 Parel,0.14,0,18.99566,72.83907
27,3.4 Trombay,0.148936,0,19.019,72.89799
33,4.4 Dadar,0.15,0,19.01992,72.84087
25,3.2 Govandi,0.160494,0,19.05617,72.91483
22,2.7 Vidyavihar,0.17,0,19.023261,72.8439
21,2.6 Powai,0.146067,0,19.12311,72.90944
20,2.5 Mulund,0.14,0,19.17185,72.95564
37,4.8 Kamathipura,0.15,0,18.96172,72.82627
18,2.3 Kanjurmarg,0.109756,0,19.1314,72.93565


#### Finally, let's visualize the resulting clusters

In [48]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mumbai_merged['Latitude'], mumbai_merged['Longitude'], mumbai_merged['Neighborhood'], mumbai_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [49]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

### 8. Examine Clusters
##### Cluster 0

In [50]:
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
0,1.1 Andheri,0.11,0,19.118483,72.841774
29,4.10 Parel,0.14,0,18.99566,72.83907
27,3.4 Trombay,0.148936,0,19.019,72.89799
33,4.4 Dadar,0.15,0,19.01992,72.84087
25,3.2 Govandi,0.160494,0,19.05617,72.91483
22,2.7 Vidyavihar,0.17,0,19.023261,72.8439
21,2.6 Powai,0.146067,0,19.12311,72.90944
20,2.5 Mulund,0.14,0,19.17185,72.95564
37,4.8 Kamathipura,0.15,0,18.96172,72.82627
18,2.3 Kanjurmarg,0.109756,0,19.1314,72.93565


##### Cluster 1

In [51]:
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
12,1.6 Goregaon,0.07,1,19.16455,72.84946
1,1.10 Kandivali east,0.1,1,19.20575,72.86969
32,4.3 Colaba,0.08,1,18.91527,72.82614
2,1.11 Khar,0.04,1,19.073447,72.835949
3,1.12 Malad,0.08,1,19.18655,72.84836
4,1.13 Santacruz,0.07,1,19.08177,72.84205
5,1.14 Vasai,0.07,1,19.07934,72.83916
34,4.5 Fort,0.1,1,18.93226,72.83288
8,1.2 Bhayandar,0.027778,1,19.30746,72.8517
9,1.3 Bandra,0.07,1,19.05422,72.84019


##### Cluster 2

In [52]:
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Indian Restaurant,Cluster Labels,Latitude,Longitude
19,2.4 Kurla,0.2,2,19.140932,72.882604
26,3.3 Mankhurd,0.186047,2,19.04853,72.9322
23,2.8 Vikhroli,0.184783,2,19.11109,72.92781
14,1.8 Juhu,0.21,2,19.01493,72.84522
6,1.15 Virar,0.24,2,19.01657,72.85853
28,4.1 Antop Hill,0.23,2,19.023011,72.866059
38,4.9 Matunga,0.2,2,19.140932,72.882604
