# Segmenting and Clustering Neighbourhoods in Mumbai

### Part 1: Importing libraries

In [148]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


### Part 2: Scraping Neighbourhoods from mumbai7.com website

In [37]:
url = "https://mumbai7.com/postal-codes-in-mumbai/"

In [40]:
def scrape_table_bs4(cols):
    page  = urllib.request.urlopen(url).read()
    soup  = bs.BeautifulSoup(page,'lxml')
    table = soup.find("table")
    header = [head.findAll(text=True)[0].strip() for head in table.find_all("th")]
    data   = [[td.findAll(text=True)[0].strip() for td in tr.find_all("td")]
              for tr in table.find_all("tr")]
    data    = [row for row in data if len(row) == cols]
    # Store data to this temporary dataframe
    raw_df = pd.DataFrame(data,columns=header)
    return raw_df
df = scrape_table_bs4(3)
mumbai_data = df[df['City'].str.contains("Mumbai")]
mumbai_data = mumbai_data.drop(['City','Pin Code'], axis = 1)
mumbai_data.columns = ['Neighborhood']

In [41]:
print(mumbai_data.shape)
mumbai_data.head()

(106, 1)


Unnamed: 0,Neighborhood
0,August Kranti Marg
1,Aarey Milk Colony
2,Andheri (East)
3,Andheri (West)
4,Antop Hill


### Part 3: Get the geographical coordinates

In [42]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Mumbai, Maharashtra, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [44]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in mumbai_data["Neighborhood"].tolist() ]

In [45]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [46]:
# merge the coordinates into the original dataframe
mumbai_data['Latitude'] = df_coords['Latitude']
mumbai_data['Longitude'] = df_coords['Longitude']

In [47]:
# check the neighborhoods and the coordinates
print(mumbai_data.shape)
mumbai_data.head()

(106, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,August Kranti Marg,18.964379,72.805994
1,Aarey Milk Colony,19.166115,72.859403
2,Andheri (East),19.10393,72.86698
3,Andheri (West),19.12929,72.83149
4,Antop Hill,19.02614,72.86645


In [48]:
mumbai_data.to_csv('mumbai_data.csv',index = False)

### Part 4: Create a map of Mumbai with neighbourhoods superimposed on top

In [49]:
mumbai_data = pd.read_csv('mumbai_data.csv')

In [50]:
# get the coordinates of Mumbai
address = 'Mumbai, Maharashtra, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai, India 18.9387711, 72.8353355.


In [51]:
# create map of Mumbai using latitude and longitude values
map_mumbai = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(mumbai_data['Latitude'], mumbai_data['Longitude'], mumbai_data['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_mumbai)  
    
map_mumbai

In [52]:
# save the map as HTML file
map_mumbai.save('map_mumbai.html')

### Part 5: Use the Foursquare API to explore the neighborhoods

In [119]:
CLIENT_ID = 'RM3JKWHSNFHQPYK13NRR440FSH2VR032HQYD3LROQ01CATN5' # my Foursquare ID
CLIENT_SECRET = 'A5UUUDDRTBY32X4EGOJ12BMOPIRQCI1KW5QFNYZVBSYHL3JC' # my Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 30

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RM3JKWHSNFHQPYK13NRR440FSH2VR032HQYD3LROQ01CATN5
CLIENT_SECRET:A5UUUDDRTBY32X4EGOJ12BMOPIRQCI1KW5QFNYZVBSYHL3JC


#### Now, let's get the top 100 venues that are within a radius of 500 meters.

In [120]:
radius = 500
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(mumbai_data['Latitude'], mumbai_data['Longitude'], mumbai_data['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [121]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1253, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,August Kranti Marg,18.964379,72.805994,Doolally Taproom,18.963809,72.807695,Brewery
1,August Kranti Marg,18.964379,72.805994,6th Street Yogurt,18.96468,72.805145,Dessert Shop
2,August Kranti Marg,18.964379,72.805994,Crossword,18.963474,72.807773,Bookstore
3,August Kranti Marg,18.964379,72.805994,Sushi and More,18.965137,72.803952,Japanese Restaurant
4,August Kranti Marg,18.964379,72.805994,Francesco's Pizzeria,18.96478,72.80463,Pizza Place


In [122]:
venues_df.to_csv('mumbai_venues.csv',index = False)

#### Let's check how many venues were returned for each neighorhood

In [123]:
venues_df = pd.read_csv('mumbai_venues.csv')
venues_df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,August Kranti Marg,18.964379,72.805994,Doolally Taproom,18.963809,72.807695,Brewery
1,August Kranti Marg,18.964379,72.805994,6th Street Yogurt,18.96468,72.805145,Dessert Shop
2,August Kranti Marg,18.964379,72.805994,Crossword,18.963474,72.807773,Bookstore
3,August Kranti Marg,18.964379,72.805994,Sushi and More,18.965137,72.803952,Japanese Restaurant
4,August Kranti Marg,18.964379,72.805994,Francesco's Pizzeria,18.96478,72.80463,Pizza Place


#### Let's find out how many unique categories can be curated from all the returned venues

In [124]:
# print out the list of categories
uniq_val = np.unique(venues_df['VenueCategory'])
print('Total unique categories',len(uniq_val))
uniq_val[:10]

Total unique categories 164


array(['ATM', 'Accessories Store', 'Afghan Restaurant',
       'Airport Terminal', 'American Restaurant', 'Antique Shop',
       'Arcade', 'Art Gallery', 'Arts & Crafts Store', 'Asian Restaurant'],
      dtype=object)

#### List all the Venue Category which have 'restaurant' in it.

In [125]:
unique_rest = set(venues_df[venues_df['VenueCategory'].str.contains("Restaurant")]['VenueCategory'])
print('There are',len(unique_rest),'different restaurant categories')
unique_rest

There are 29 different restaurant categories


{'Afghan Restaurant',
 'American Restaurant',
 'Asian Restaurant',
 'Bengali Restaurant',
 'Brazilian Restaurant',
 'Chinese Restaurant',
 'Comfort Food Restaurant',
 'Dumpling Restaurant',
 'Falafel Restaurant',
 'Fast Food Restaurant',
 'French Restaurant',
 'German Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Maharashtrian Restaurant',
 'Mediterranean Restaurant',
 'Middle Eastern Restaurant',
 'Mughlai Restaurant',
 'Multicuisine Indian Restaurant',
 'New American Restaurant',
 'North Indian Restaurant',
 'Parsi Restaurant',
 'Punjabi Restaurant',
 'Restaurant',
 'Seafood Restaurant',
 'South Indian Restaurant',
 'Thai Restaurant',
 'Vegetarian / Vegan Restaurant'}

#### Rename all the different restaurant categories into one single name

In [128]:
list_rest = list(unique_rest)
venues_df = venues_df.replace(to_replace = list_rest, value = 'Restaurant')

### Part 6: Analysing Each Neighbourhood

In [129]:
# one hot encoding
mumbai_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
mumbai_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [mumbai_onehot.columns[-1]] + list(mumbai_onehot.columns[:-1])
mumbai_onehot = mumbai_onehot[fixed_columns]

print(mumbai_onehot.shape)
mumbai_onehot.head()

(1253, 137)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Airport Terminal,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Beach,Bed & Breakfast,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Building,Burger Joint,Bus Station,Cafeteria,Café,Chaat Place,Cheese Shop,Clothing Store,Cocktail Bar,Coffee Shop,College Academic Building,Concert Hall,Convenience Store,Cricket Ground,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Event Space,Farm,Farmers Market,Field,Flea Market,Flower Shop,Food,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,General Entertainment,Gift Shop,Government Building,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indie Movie Theater,Irani Cafe,Irish Pub,Jewelry Store,Juice Bar,Light Rail Station,Lighthouse,Liquor Store,Lounge,Market,Men's Store,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Motorcycle Shop,Movie Theater,Multiplex,Music Venue,Neighborhood,Nightclub,Office,Optical Shop,Other Great Outdoors,Outdoors & Recreation,Paper / Office Supplies Store,Park,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Racetrack,Residential Building (Apartment / Condo),Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Stadium,Steakhouse,Tea Room,Theater,Tourist Information Center,Train Station,Whisky Bar,Wine Bar,Women's Store,Yoga Studio
0,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [130]:
mumbai_grouped = mumbai_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(mumbai_grouped.shape)
mumbai_grouped

(104, 137)


Unnamed: 0,Neighborhoods,ATM,Accessories Store,Airport Terminal,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,Athletics & Sports,Automotive Shop,BBQ Joint,Bakery,Bank,Bar,Beach,Bed & Breakfast,Bistro,Boat or Ferry,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Building,Burger Joint,Bus Station,Cafeteria,Café,Chaat Place,Cheese Shop,Clothing Store,Cocktail Bar,Coffee Shop,College Academic Building,Concert Hall,Convenience Store,Cricket Ground,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Donut Shop,Electronics Store,Event Space,Farm,Farmers Market,Field,Flea Market,Flower Shop,Food,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Gastropub,Gay Bar,General Entertainment,Gift Shop,Government Building,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indie Movie Theater,Irani Cafe,Irish Pub,Jewelry Store,Juice Bar,Light Rail Station,Lighthouse,Liquor Store,Lounge,Market,Men's Store,Miscellaneous Shop,Mobile Phone Shop,Monument / Landmark,Motorcycle Shop,Movie Theater,Multiplex,Music Venue,Neighborhood,Nightclub,Office,Optical Shop,Other Great Outdoors,Outdoors & Recreation,Paper / Office Supplies Store,Park,Performing Arts Venue,Pharmacy,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Racetrack,Residential Building (Apartment / Condo),Restaurant,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Spa,Sporting Goods Shop,Stadium,Steakhouse,Tea Room,Theater,Tourist Information Center,Train Station,Whisky Bar,Wine Bar,Women's Store,Yoga Studio
0,Aarey Milk Colony,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Airoli Mode,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Andheri (East),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.176471,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.352941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Andheri (West),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.153846,0.0,0.076923,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0
4,Antop Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Anu Shakti Nagar,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,August Kranti Marg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.088235,0.0,0.029412,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.029412,0.029412,0.088235,0.0,0.0,0.0,0.0,0.058824,0.0,0.029412,0.0,0.0,0.0,0.0,0.029412,0.058824,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.205882,0.0,0.029412,0.029412,0.029412,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,B A R C,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Ballard Estate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.294118,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Bandra (East),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0


In [131]:
len(mumbai_grouped[mumbai_grouped["Restaurant"] > 0])

83

#### Create a new DataFrame for Restaurants data only

In [132]:
mumbai_rest = mumbai_grouped[["Neighborhoods","Restaurant"]]
mumbai_rest.describe()

Unnamed: 0,Restaurant
count,104.0
mean,0.28153
std,0.197051
min,0.0
25%,0.142857
50%,0.285714
75%,0.403448
max,0.75


In [135]:
mumbai_rest.head()

Unnamed: 0,Neighborhoods,Restaurant
0,Aarey Milk Colony,0.5
1,Airoli Mode,0.6
2,Andheri (East),0.352941
3,Andheri (West),0.076923
4,Antop Hill,0.0


### Part 7: Cluster Neighborhoods

#### Run k-means to cluster the neighborhoods in Mumbai into 4 clusters.

In [153]:
# set number of clusters
kclusters = 4

mumbai_clustering = mumbai_rest.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mumbai_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 3, 0, 1, 1, 2, 2, 0, 2, 0], dtype=int32)

In [154]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
mumbai_merged = mumbai_rest.copy()

# add clustering labels
mumbai_merged["Cluster Labels"] = kmeans.labels_

In [155]:
mumbai_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
mumbai_merged.head()

Unnamed: 0,Neighborhood,Restaurant,Cluster Labels
0,Aarey Milk Colony,0.5,0
1,Airoli Mode,0.6,3
2,Andheri (East),0.352941,0
3,Andheri (West),0.076923,1
4,Antop Hill,0.0,1


In [156]:
# merge mumbai_grouped with mumbai_data to add latitude/longitude for each neighborhood
mumbai_merged = mumbai_merged.join(mumbai_data.set_index("Neighborhood"), on="Neighborhood")

print(mumbai_merged.shape)
mumbai_merged.head() # check the last columns!

(104, 5)


Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.5,0,19.166115,72.859403
1,Airoli Mode,0.6,3,19.15558,72.99815
2,Andheri (East),0.352941,0,19.10393,72.86698
3,Andheri (West),0.076923,1,19.12929,72.83149
4,Antop Hill,0.0,1,19.02614,72.86645


In [157]:
# sort the results by Cluster Labels
print(mumbai_merged.shape)
mumbai_merged.sort_values(["Cluster Labels"], inplace=True)
mumbai_merged

(104, 5)


Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.5,0,19.166115,72.859403
36,Grant Road,0.466667,0,18.95929,72.83108
37,Hutatma Chowk,0.375,0,18.93261,72.83159
39,J B Nagar,0.368421,0,19.11051,72.86641
40,Jacob Circle,0.333333,0,18.98007,72.82753
43,Juhu,0.444444,0,19.01492,72.84522
45,Kandivli (East),0.4,0,19.20576,72.86953
49,Konkan Bhawan,0.4,0,19.02287,73.03981
52,Mahim,0.428571,0,19.04073,72.84305
58,Mantralaya,0.324324,0,18.92817,72.82633


#### Finally, let's visualize the resulting clusters

In [158]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mumbai_merged['Latitude'], mumbai_merged['Longitude'], mumbai_merged['Neighborhood'], mumbai_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [159]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

### Part 8: Examine Clusters

#### Cluster 0

In [161]:
print(mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 0].describe())
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 0]

       Restaurant  Cluster Labels   Latitude  Longitude
count   37.000000            37.0  37.000000  37.000000
mean     0.406881             0.0  19.051649  72.866280
std      0.057129             0.0   0.089672   0.050977
min      0.324324             0.0  18.915420  72.815070
25%      0.368421             0.0  18.980070  72.831590
50%      0.400000             0.0  19.044638  72.851700
75%      0.444444             0.0  19.110510  72.874040
max      0.523810             0.0  19.250030  73.039810


Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
0,Aarey Milk Colony,0.5,0,19.166115,72.859403
36,Grant Road,0.466667,0,18.95929,72.83108
37,Hutatma Chowk,0.375,0,18.93261,72.83159
39,J B Nagar,0.368421,0,19.11051,72.86641
40,Jacob Circle,0.333333,0,18.98007,72.82753
43,Juhu,0.444444,0,19.01492,72.84522
45,Kandivli (East),0.4,0,19.20576,72.86953
49,Konkan Bhawan,0.4,0,19.02287,73.03981
52,Mahim,0.428571,0,19.04073,72.84305
58,Mantralaya,0.324324,0,18.92817,72.82633


#### Cluster 1

In [162]:
print(mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 1].describe())
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 1]

       Restaurant  Cluster Labels   Latitude  Longitude
count   25.000000            25.0  25.000000  25.000000
mean     0.015363             1.0  19.076788  72.871853
std      0.036226             0.0   0.080822   0.053673
min      0.000000             1.0  18.957330  72.805850
25%      0.000000             1.0  19.007440  72.838180
50%      0.000000             1.0  19.087426  72.855080
75%      0.000000             1.0  19.130302  72.882230
max      0.107143             1.0  19.234960  73.024930


Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
103,Worli,0.0,1,19.00744,72.81688
86,Shivaji Nagar (Kurla),0.0,1,19.09428,72.88223
85,Sewri,0.0,1,18.99636,72.85387
84,Santacruz P&T Colony,0.0,1,19.101982,72.862979
18,Borivli (West),0.0,1,19.23496,72.85508
80,Sahar,0.0,1,19.102711,72.862546
54,Malad (East),0.0,1,19.18229,72.86398
65,Mulund Colony,0.0,1,19.17505,72.93752
29,Dharavi,0.0,1,19.04674,72.85461
12,Barve Nagar,0.0,1,19.09709,72.90105


#### Cluster 2

In [163]:
print(mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 2].describe())
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 2]

       Restaurant  Cluster Labels   Latitude  Longitude
count   33.000000            33.0  33.000000  33.000000
mean     0.237531             2.0  19.074015  72.862815
std      0.042854             0.0   0.097358   0.057072
min      0.142857             2.0  18.924900  72.795760
25%      0.210526             2.0  18.969230  72.823190
50%      0.238095             2.0  19.085050  72.842260
75%      0.279070             2.0  19.156120  72.879749
max      0.307692             2.0  19.247970  73.017502


Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
83,Santacruz (West),0.227273,2,19.08303,72.83895
93,Turbhe,0.25,2,19.072907,73.017502
81,Saki Naka,0.25,2,19.126853,72.894312
5,Anu Shakti Nagar,0.2,2,19.04283,72.92734
88,Tagore Nagar,0.25,2,19.11393,72.93267
96,Veer Jijamata Bhosle Udyan,0.222222,2,18.99264,72.81908
8,Ballard Estate,0.294118,2,18.93541,72.83979
97,Vesava (Versova),0.238095,2,19.13769,72.81348
98,Vidyanagari,0.166667,2,19.176687,72.879749
100,Vile Parle (East),0.210526,2,19.09744,72.84942


#### Cluster 3

In [164]:
print(mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 3].describe())
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 3]

       Restaurant  Cluster Labels   Latitude  Longitude
count    9.000000             9.0   9.000000   9.000000
mean     0.666885             3.0  19.073293  72.912267
std      0.069604             0.0   0.076321   0.075037
min      0.583333             3.0  18.949960  72.829940
25%      0.600000             3.0  19.015064  72.840860
50%      0.666667             3.0  19.078380  72.901910
75%      0.750000             3.0  19.145530  72.948570
max      0.750000             3.0  19.155580  73.036695


Unnamed: 0,Neighborhood,Restaurant,Cluster Labels,Latitude,Longitude
1,Airoli Mode,0.6,3,19.15558,72.99815
13,Belapur,0.647059,3,19.011955,73.036695
44,Kalbadevi,0.75,3,18.94996,72.82994
16,Bhavani Shankar Road,0.666667,3,19.015064,72.836912
78,Rajawadi,0.588235,3,19.07838,72.90191
14,Bhandup,0.75,3,19.14556,72.94856
76,Poonam Ngr Jogeshwari (E),0.666667,3,19.1377,72.86881
26,Dadar,0.583333,3,19.01991,72.84086
15,Bhandup (East),0.75,3,19.14553,72.94857
